diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 000000000..536443860 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,22 @@ +{ + "name": "Tianshou", + "dockerFile": "../Dockerfile", + "workspaceFolder": "/workspaces/tianshou", + "runArgs": ["--shm-size=1g"], + "customizations": { + "vscode": { + "settings": { + "terminal.integrated.shell.linux": "/bin/bash", + "python.pythonPath": "/usr/local/bin/python" + }, + "extensions": [ + "ms-python.python", + "ms-toolsai.jupyter", + "ms-python.vscode-pylance" + ] + } + }, + "forwardPorts": [], + "postCreateCommand": "poetry install --with dev", + "remoteUser": "root" + } \ No newline at end of file diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 000000000..fa5050fe5 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,14 @@ +data +logs +test/log +docs/jupyter_execute +docs/.jupyter_cache +.lsp +.clj-kondo +docs/_build +coverage* +__pycache__ +*.egg-info +*.egg +.*cache +dist \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md index 16f76f989..4e3f24ece 100644 --- a/.github/ISSUE_TEMPLATE.md +++ b/.github/ISSUE_TEMPLATE.md @@ -3,6 +3,7 @@ + [ ] RL algorithm bug + [ ] documentation request (i.e. "X is missing from the documentation.") + [ ] new feature request + + [ ] design request (i.e. "X should be changed to Y.") - [ ] I have visited the [source website](https://github.com/thu-ml/tianshou/) - [ ] I have searched through the [issue tracker](https://github.com/thu-ml/tianshou/issues) for duplicates - [ ] I have mentioned version numbers, operating system and environment, where applicable: diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 280583538..364f89649 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,9 +1,10 @@ -- [ ] I have marked all applicable categories: - + [ ] exception-raising fix - + [ ] algorithm implementation fix - + [ ] documentation modification - + [ ] new feature -- [ ] I have reformatted the code using `make format` (**required**) -- [ ] I have checked the code using `make commit-checks` (**required**) -- [ ] If applicable, I have mentioned the relevant/related issue(s) -- [ ] If applicable, I have listed every items in this Pull Request below +- [ ] I have added the correct label(s) to this Pull Request or linked the relevant issue(s) +- [ ] I have provided a description of the changes in this Pull Request +- [ ] I have added documentation for my changes and have listed relevant changes in CHANGELOG.md +- [ ] If applicable, I have added tests to cover my changes. +- [ ] If applicable, I have made sure that the determinism tests run through, meaning that my changes haven't influenced any aspect of training. See info in the contributing documentation. +- [ ] I have reformatted the code using `poe format` +- [ ] I have checked style and types with `poe lint` and `poe type-check` +- [ ] (Optional) I ran tests locally with `poe test` +(or a subset of them with `poe test-reduced`) ,and they pass +- [ ] (Optional) I have tested that documentation builds correctly with `poe doc-build` \ No newline at end of file diff --git a/.github/workflows/extra_sys.yml b/.github/workflows/extra_sys.yml index db945cbb3..56775f36a 100644 --- a/.github/workflows/extra_sys.yml +++ b/.github/workflows/extra_sys.yml @@ -1,7 +1,19 @@ name: Windows/MacOS -on: [push, pull_request] - +on: + pull_request: + branches: + - master + push: + branches: + - master + workflow_dispatch: + inputs: + debug_enabled: + type: boolean + description: 'Run the build with tmate debugging enabled (https://github.com/marketplace/actions/debugging-with-tmate)' + required: false + default: false jobs: cpu-extra: runs-on: ${{ matrix.os }} @@ -9,8 +21,11 @@ jobs: strategy: matrix: os: [macos-latest, windows-latest] - python-version: [3.7, 3.8] + python-version: [3.11] steps: + - name: Setup tmate session + uses: mxschmitt/action-tmate@v3 + if: ${{ github.event_name == 'workflow_dispatch' && inputs.debug_enabled }} - name: Cancel previous run uses: styfle/cancel-workflow-action@0.11.0 with: @@ -20,16 +35,23 @@ jobs: uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - - name: Upgrade pip - run: | - python -m pip install --upgrade pip setuptools wheel - - name: Install dependencies + # use poetry and cache installed packages, see https://github.com/marketplace/actions/python-poetry-action + - name: Install poetry + uses: abatilo/actions-poetry@v2 + - name: Setup a local virtual environment (if no poetry.toml file) run: | - python -m pip install ".[dev]" --upgrade - python -m pip uninstall ray -y + poetry config virtualenvs.create true --local + poetry config virtualenvs.in-project true --local + - uses: actions/cache@v3 + name: Define a cache for the virtual environment based on the dependencies lock file + with: + path: ./.venv + key: venv-${{ hashFiles('poetry.lock') }} + - name: Install the project dependencies + # ugly as hell, but well... + # see https://github.com/python-poetry/poetry/issues/7611 + run: poetry install --with dev || poetry install --with dev || poetry install --with dev - name: wandb login - run: | - wandb login e2366d661b89f2bee877c40bee15502d67b7abef + run: poetry run wandb login e2366d661b89f2bee877c40bee15502d67b7abef - name: Test with pytest - run: | - pytest test/base test/continuous --cov=tianshou --durations=0 -v --color=yes + run: poetry run poe test-reduced diff --git a/.github/workflows/gputest.yml b/.github/workflows/gputest.yml index 18ff68751..dae6a887f 100644 --- a/.github/workflows/gputest.yml +++ b/.github/workflows/gputest.yml @@ -1,31 +1,55 @@ name: Ubuntu GPU -on: [push, pull_request] +on: + pull_request: + branches: + - master + push: + branches: + - master + workflow_dispatch: + inputs: + debug_enabled: + type: boolean + description: 'Run the build with tmate debugging enabled (https://github.com/marketplace/actions/debugging-with-tmate)' + required: false + default: false jobs: gpu: runs-on: [self-hosted, Linux, X64] if: "!contains(github.event.head_commit.message, 'ci skip')" steps: + - name: Setup tmate session + uses: mxschmitt/action-tmate@v3 + if: ${{ github.event_name == 'workflow_dispatch' && inputs.debug_enabled }} - name: Cancel previous run uses: styfle/cancel-workflow-action@0.11.0 with: access_token: ${{ github.token }} - uses: actions/checkout@v3 - - name: Set up Python 3.8 + - name: Set up Python 3.11 uses: actions/setup-python@v4 with: - python-version: 3.8 - - name: Upgrade pip + python-version: "3.11" + # use poetry and cache installed packages, see https://github.com/marketplace/actions/python-poetry-action + - name: Install poetry + uses: abatilo/actions-poetry@v2 + - name: Setup a local virtual environment (if no poetry.toml file) run: | - python -m pip install --upgrade pip setuptools wheel - - name: Install dependencies + poetry config virtualenvs.create true --local + poetry config virtualenvs.in-project true --local + - uses: actions/cache@v3 + name: Define a cache for the virtual environment based on the dependencies lock file + with: + path: ./.venv + key: venv-${{ hashFiles('poetry.lock') }} + - name: Install the project dependencies run: | - python -m pip install ".[dev]" --upgrade + poetry install --with dev --extras "envpool" - name: wandb login run: | - wandb login e2366d661b89f2bee877c40bee15502d67b7abef + poetry run wandb login e2366d661b89f2bee877c40bee15502d67b7abef - name: Test with pytest - # ignore test/throughput which only profiles the code run: | - pytest test --ignore-glob='*profile.py' --cov=tianshou --cov-report=xml --durations=0 -v --color=yes + poetry run poe test diff --git a/.github/workflows/lint_and_docs.yml b/.github/workflows/lint_and_docs.yml index 336ae6880..fce69a8fa 100644 --- a/.github/workflows/lint_and_docs.yml +++ b/.github/workflows/lint_and_docs.yml @@ -1,37 +1,54 @@ name: PEP8, Types and Docs Check -on: [push, pull_request] +on: + pull_request: + branches: + - master + push: + branches: + - master + workflow_dispatch: + inputs: + debug_enabled: + type: boolean + description: 'Run the build with tmate debugging enabled (https://github.com/marketplace/actions/debugging-with-tmate)' + required: false + default: false jobs: check: runs-on: ubuntu-latest steps: + - name: Setup tmate session + uses: mxschmitt/action-tmate@v3 + if: ${{ github.event_name == 'workflow_dispatch' && inputs.debug_enabled }} - name: Cancel previous run uses: styfle/cancel-workflow-action@0.11.0 with: access_token: ${{ github.token }} - uses: actions/checkout@v3 - - name: Set up Python 3.8 + - name: Set up Python 3.11 uses: actions/setup-python@v4 with: - python-version: 3.8 - - name: Upgrade pip + python-version: 3.11 + # use poetry and cache installed packages, see https://github.com/marketplace/actions/python-poetry-action + - name: Install poetry + uses: abatilo/actions-poetry@v2 + - name: Setup a local virtual environment (if no poetry.toml file) run: | - python -m pip install --upgrade pip setuptools wheel - - name: Install dependencies - run: | - python -m pip install ".[dev]" --upgrade - - name: Lint with flake8 - run: | - flake8 . --count --show-source --statistics - - name: Code formatter - run: | - yapf -r -d . - isort --check . - - name: Type check - run: | - mypy - - name: Documentation test + poetry config virtualenvs.create true --local + poetry config virtualenvs.in-project true --local + - uses: actions/cache@v3 + name: Define a cache for the virtual environment based on the dependencies lock file + with: + path: ./.venv + key: venv-${{ hashFiles('poetry.lock') }} + - name: Install the project dependencies run: | - make check-docstyle - make spelling + poetry install --with dev --extras "eval" + - name: Lint + run: poetry run poe lint + - name: Types + run: poetry run poe type-check + - name: Docs + run: poetry run poe doc-build diff --git a/.github/workflows/publish.yaml b/.github/workflows/publish.yaml new file mode 100644 index 000000000..1b3b2dceb --- /dev/null +++ b/.github/workflows/publish.yaml @@ -0,0 +1,28 @@ +name: Upload Python Package + +on: + release: + types: [created] + +jobs: + deploy: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Set up Python + uses: actions/setup-python@v1 + with: + python-version: 3.11 + # use poetry and cache installed packages, see https://github.com/marketplace/actions/python-poetry-action + - name: Install poetry + uses: abatilo/actions-poetry@v2 + - name: Setup a local virtual environment (if no poetry.toml file) + run: | + poetry config virtualenvs.create true --local + poetry config virtualenvs.in-project true --local + - name: Build and publish + env: + POETRY_PYPI_TOKEN_PYPI: ${{ secrets.PYPI_TOKEN }} + run: | + if [ -z "${POETRY_PYPI_TOKEN_PYPI}" ]; then echo "Set the PYPI_TOKEN variable in your repository secrets"; exit 1; fi + poetry publish --build diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index c08d8ec28..7396e5cd3 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -1,6 +1,19 @@ name: Ubuntu -on: [push, pull_request] +on: + pull_request: + branches: + - master + push: + branches: + - master + workflow_dispatch: + inputs: + debug_enabled: + type: boolean + description: 'Run the build with tmate debugging enabled (https://github.com/marketplace/actions/debugging-with-tmate)' + required: false + default: false jobs: cpu: @@ -8,8 +21,11 @@ jobs: if: "!contains(github.event.head_commit.message, 'ci skip')" strategy: matrix: - python-version: [3.7, 3.8, 3.9] + python-version: ["3.11"] steps: + - name: Setup tmate session + uses: mxschmitt/action-tmate@v3 + if: ${{ github.event_name == 'workflow_dispatch' && inputs.debug_enabled }} - name: Cancel previous run uses: styfle/cancel-workflow-action@0.11.0 with: @@ -19,19 +35,28 @@ jobs: uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - - name: Upgrade pip + # use poetry and cache installed packages, see https://github.com/marketplace/actions/python-poetry-action + - name: Install poetry + uses: abatilo/actions-poetry@v2 + - name: Setup a local virtual environment (if no poetry.toml file) run: | - python -m pip install --upgrade pip setuptools wheel - - name: Install dependencies + poetry config virtualenvs.create true --local + poetry config virtualenvs.in-project true --local + - uses: actions/cache@v3 + name: Define a cache for the virtual environment based on the dependencies lock file + with: + path: ./.venv + key: venv-${{ hashFiles('poetry.lock') }} + - name: Install the project dependencies run: | - python -m pip install ".[dev]" --upgrade + poetry install --with dev --extras "envpool eval" - name: wandb login run: | - wandb login e2366d661b89f2bee877c40bee15502d67b7abef + poetry run wandb login e2366d661b89f2bee877c40bee15502d67b7abef - name: Test with pytest # ignore test/throughput which only profiles the code run: | - pytest test --ignore-glob='*profile.py' --ignore="test/3rd_party" --cov=tianshou --cov-report=xml --cov-report=term-missing --durations=0 -v --color=yes + poetry run poe test - name: Upload coverage to Codecov uses: codecov/codecov-action@v1 with: diff --git a/.gitignore b/.gitignore index fd72be398..40ce4a299 100644 --- a/.gitignore +++ b/.gitignore @@ -111,7 +111,7 @@ celerybeat.pid .env .venv venv/ -ENV/ +/ENV/ env.bak/ venv.bak/ @@ -149,3 +149,19 @@ MUJOCO_LOG.TXT *.hdf5 wandb/ videos/ + +# might be needed for IDE plugins that can't read ruff config +.flake8 + +docs/notebooks/_build/ +docs/conf.py + +# temporary scripts (for ad-hoc testing), temp folder +/temp +/temp*.py + +# Serena +/.serena + +# determinism test snapshots +/test/resources/determinism/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 804de4d6f..aa00e7474 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,35 +1,52 @@ ---- +default_install_hook_types: [commit-msg, pre-commit] +default_stages: [commit, manual] +fail_fast: false repos: - # - repo: local - # hooks: - # - id: mypy - # name: mypy - # entry: mypy - # language: python - # pass_filenames: false - # args: [--config-file=setup.cfg, tianshou] - - - repo: https://github.com/google/yapf - rev: v0.32.0 + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.4.0 hooks: - - id: yapf - args: [-r, -i] - - - repo: https://github.com/pycqa/isort - rev: 5.10.1 + - id: check-added-large-files + - repo: local hooks: - - id: isort - name: isort - - - repo: https://github.com/PyCQA/flake8 - rev: 4.0.1 - hooks: - - id: flake8 - args: [--config=setup.cfg, --count, --show-source, --statistics] - additional_dependencies: ["flake8_bugbear"] - - - repo: https://github.com/pycqa/pydocstyle - rev: 6.1.1 - hooks: - - id: pydocstyle - exclude: ^(test/)|(docs/)|(examples/)|(setup.py) + - id: ruff + name: ruff + entry: poetry run ruff + require_serial: true + language: system + types: [python] + - id: ruff-nb + name: ruff-nb + entry: poetry run nbqa ruff . + require_serial: true + language: system + pass_filenames: false + types: [python] + - id: black + name: black + entry: poetry run black + require_serial: true + language: system + types: [python] + - id: poetry-check + name: poetry check + entry: poetry check + language: system + files: pyproject.toml + pass_filenames: false + - id: poetry-lock-check + name: poetry lock check + entry: poetry check + args: [--lock] + language: system + pass_filenames: false + - id: mypy + name: mypy + entry: poetry run mypy tianshou examples test + # filenames should not be passed as they would collide with the config in pyproject.toml + pass_filenames: false + files: '^tianshou(/[^/]*)*/[^/]*\.py$' + language: system + - id: mypy-nb + name: mypy-nb + entry: poetry run nbqa mypy + language: system diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 000000000..16ce28e2b --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,23 @@ +# .readthedocs.yaml +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required +version: 2 + +# Set the version of Python and other tools you might need +build: + os: ubuntu-22.04 + tools: + python: "3.11" + commands: + - mkdir -p $READTHEDOCS_OUTPUT/html + - curl -sSL https://install.python-poetry.org | python - +# - ~/.local/bin/poetry config virtualenvs.create false + - ~/.local/bin/poetry install --with dev -E eval +## Same as poe tasks, but unfortunately poe doesn't work with poetry not creating virtualenvs + - ~/.local/bin/poetry run python docs/autogen_rst.py + - ~/.local/bin/poetry run which jupyter-book + - ~/.local/bin/poetry run python docs/create_toc.py + - ~/.local/bin/poetry run jupyter-book config sphinx docs/ + - ~/.local/bin/poetry run sphinx-build -W -b html docs $READTHEDOCS_OUTPUT/html diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 000000000..2e4f8b703 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,573 @@ +# Change Log + +## Upcoming Release 2.0.0 + +This major release of Tianshou is a big step towards cleaner design and improved usability. + +Given the large extent of the changes, it was not possible to maintain compatibility with the previous version. + * Persisted agents that were created with earlier versions cannot be loaded in v2. + * Source code from v1 can, however, be migrated to v2 with minimal effort. + See migration information below. For concrete examples, you may use git to diff individual + example scripts with the corresponding ones in `v1.2.0`. + +This release is brought to you by [Applied AI Institute gGmbH](https://www.appliedai-institute.de). + +Developers: + * Dr. Dominik Jain (@opcode81) + * Michael Panchenko (@MischaPanch) + +### Trainer Abstraction + +* The trainer logic and configuration is now properly separated between the three cases of on-policy, off-policy + and offline learning: The base class is no longer a "God" class (formerly `BaseTrainer`) which does it all; logic and functionality has moved + to the respective subclasses (`OnPolicyTrainer`, `OffPolicyTrainer` and `OfflineTrainer`, with `OnlineTrainer` + being introduced as a base class for the two former specialisations). + +* The trainers now use configuration objects with central documentation (which has been greatly improved to enhance + clarity and usability in general); every type of trainer now has a dedicated configuration class which provides + precisely the options that are applicable. + +* The interface has been streamlined with improved naming of functions/parameters and limiting the public interface to purely + the methods and attributes a user should reasonably access. + +* Further changes potentially affecting usage: + * We dropped the iterator semantics: Method `__next__` has been replaced by `execute_epoch`. #913 + * We no longer report outdated statistics (e.g. on rewards/returns when a training step does not collect any full + episodes) + * See also "Issues resolved" below (as issue resolution can result in usage changes) + * The default value for `test_in_train` was changed from True to False (updating all usage sites to explicitly + set the parameter), because False is the more natural default, which does not make assumptions about + returns/score values computed for the data from a collection step being at all meaningful for early stopping + * The management of epsilon-greedy exploration for discrete Q-learning algorithms has been simplified: + * All respective Policy implementations (e.g. `DQNPolicy`, `C51Policy`, etc.) now accept two parameters + `eps_training` and `eps_inference`, which allows the training and test collection cases to be sufficiently + differentiated and makes the use of callback functions (`train_fn`, `test_fn`) unnecessary if only + constants are to be set. + * The setter method `set_eps` has been replaced with `set_eps_training` and `set_eps_inference` accordingly. + +* Further internal changes unlikely to affect usage: + * Module `trainer.utils` was removed and the functions therein where moved to class `Trainer` + * The two places that collected and evaluated test episodes (`_test_in_train` and `_reset`) in addition to + `_test_step` were unified to use `_test_step` (with some minor parametrisation) and now log the results + of the test step accordingly. + +* Issues resolved: + * Methods `run` and `reset`: Parameter `reset_prior_to_run` of `run` was never respected if it was set to `False`, + because the implementation of `__iter__` (now removed) would call `reset` regardless - and calling `reset` + is indeed necessary, because it initializes the training. The parameter was removed and replaced by + `reset_collectors` (such that `run` now replicates the parameters of `reset`). + * Inconsistent configuration options now raise exceptions rather than silently ignoring the issue in the + hope that default behaviour will achieve what the user intended. + One condition where `test_in_train` was silently set to `False` was removed and replaced by a warning. + * The stop criterion `stop_fn` did not consider scores as computed by `compute_score_fn` but instead always used + mean returns (i.e. it was assumed that the default implementation of `compute_score_fn` applies). + This is an inconsistency which has been resolved. + * The `gradient_step` counter was flawed (as it made assumptions about the underlying algorithms, which were + not valid). It has been replaced with an update step counter. + Members of `InfoStats` and parameters of `Logger` (and subclasses) were changed accordingly. + +* Migration information at a glance: + * Training parameters are now passed via instances of configuration objects instead of directly as keyword arguments: + `OnPolicyTrainerParams`, `OffPolicyTrainerParams`, `OfflineTrainerParams`. + * Changed parameter default: Default for `test_in_train` was changed from True to False. + * Changed parameter names to improve clarity: + * `max_epoch` (`num_epochs` in high-level API) -> `max_epochs` + * `step_per_epoch` -> `epoch_num_steps` + * `episode_per_test` (`num_test_episodes` in high-level API) -> `test_step_num_episodes` + * `step_per_collect` -> `collection_step_num_env_steps` + * `episode_per_collect` -> collection_step_num_episodes` + * `update_per_step` -> `update_step_num_gradient_steps_per_sample` + * `repeat_per_collect` -> `update_step_num_repetitions` + * Trainer classes have been renamed: + * `OnpolicyTrainer` -> `OnPolicyTrainer` + * `OffpolicyTrainer` -> `OffPolicyTrainer` + * Method `run`: The parameter `reset_prior_to_run` was removed and replaced by `reset_collectors` (see above). + * Methods `run` and `reset`: The parameter `reset_buffer` was renamed to `reset_collector_buffers` for clarity + * Trainers are no longer iterators; manual usage (not using `run`) should simply call `reset` followed by + calls of `execute_epoch`. + +### Algorithms and Policies + +* We now conceptually differentiate between the learning algorithm and the policy being optimised: + + * The abstraction `BasePolicy` is thus replaced by `Algorithm` and `Policy`, and the package was renamed + from `tianshou.policy` to `tianshou.algorithm`. + + * Migration information: The instantiation of a policy is replaced by the instantiation of an `Algorithm`, + which is passed a `Policy`. In most cases, the former policy class name `Policy` is replaced by algorithm + class ``; exceptions are noted below. + + * `ImitationPolicy` -> `OffPolicyImitationLearning`, `OfflineImitationLearning` + * `PGPolicy` -> `Reinforce` + * `MultiAgentPolicyManager` -> `MultiAgentOnPolicyAlgorithm`, `MultiAgentOffPolicyAlgorithm` + * `MARLRandomPolicy` -> `MARLRandomDiscreteMaskedOffPolicyAlgorithm` + + For the respective subtype of `Policy` to use, see the respective algorithm class' constructor. + +* Interface changes/improvements: + * Core methods have been renamed (and removed from the public interface; #898): + * `process_fn` -> `_preprocess_batch` + * `post_process_fn` -> `_postprocess_batch` + * `learn` -> `_update_with_batch` + * The updating interface has been cleaned up (#949): + * Functions `update` and `_update_with_batch` (formerly `learn`) no longer have `*args` and `**kwargs`. + * Instead, the interfaces for the offline, off-policy and on-policy cases are properly differentiated. + * New method `run_training`: The `Algorithm` abstraction can now directly initiate the learning process via this method. + * `Algorithms` no longer require `torch.optim.Optimizer` instances and instead require `OptimizerFactory` + instances, which create the actual optimizers internally. #959 + The new `OptimizerFactory` abstraction simultaneously handles the creation of learning rate schedulers + for the optimizers created (via method `with_lr_scheduler_factory` and accompanying factory abstraction + `LRSchedulerFactory`). + The parameter `lr_scheduler` has thus been removed from all algorithm constructors. + * The flag `updating` has been removed (no internal usage, general usefulness questionable). + * Removed `max_action_num`, instead read it off from `action_space` + * Parameter changes: + * `actor_step_size` -> `trust_region_size` in NP + * `discount_factor` -> `gamma` (was already used internally almost everywhere) + * `reward_normalization` -> `return_standardization` or `return_scaling` (more precise naming) or removed (was actually unsupported by Q-learning algorithms) + * `return_standardization` in `Reinforce` and `DiscreteCRR` (as it applies standardization of returns) + * `return_scaling` in actor-critic on-policy algorithms (A2C, PPO, GAIL, NPG, TRPO) + * removed from Q-learning algorithms, where it was actually unsupported (DQN, C561, etc.) + * `clip_grad` -> `max_grad_norm` (for consistency) + * `clip_loss_grad` -> `huber_loss_delta` (allowing to control not only the use of the Huber loss but also its essential parameter) + * `estimation_step` -> `n_step_return_horizon` (more precise naming) + +* Internal design improvements: + + * Introduced an abstraction for the alpha parameter (coefficient of the entropy term) + in `SAC`, `DiscreteSAC` and other algorithms. + * Class hierarchy: + * Abstract base class `Alpha` base class with value property and update method + * `FixedAlpha` for constant entropy coefficients + * `AutoAlpha` for automatic entropy tuning (replaces the old tuple-based representation) + * The (auto-)updating logic is now completely encapsulated, reducing the complexity of the algorithms. + * Implementations for continuous and discrete cases now share the same abstraction, + making the codebase more consistent while preserving the original functionality. + + * Introduced a policy base class `ContinuousPolicyWithExplorationNoise` which encapsulates noise generation + for continuous action spaces (e.g. relevant to `DDPG`, `SAC` and `REDQ`). + + * Multi-agent RL methods are now differentiated by the type of the sub-algorithms being employed + (`MultiAgentOnPolicyAlgorithm`, `MultiAgentOffPolicyAlgorithm`), which renders all interfaces clean. + Helper class `MARLDispatcher` has been factored out to manage the dispatching of data to the respective agents. + + * Algorithms now internally use a wrapper (`Algorithm.Optimizer`) around the optimizers; creation is handled + by method `_create_optimizer`. + * This facilitates backpropagation steps with gradient clipping. + * The optimizers of an Algorithm instance are now centrally tracked, such that we can ensure that the + optimizers' states are handled alongside the model parameters when calling `state_dict` or `load_state_dict` + on the `Algorithm` instance. + Special handling of the restoration of optimizers' state dicts was thus removed from examples and tests. + + * Lagged networks (target networks) are now conveniently handled via the new algorithm mixins + `LaggedNetworkPolyakUpdateAlgorithmMixin` and `LaggedNetworkFullUpdateAlgorithmMixin`. + Using these mixins, + + * a lagged network can simply be added by calling `_add_lagged_network` + * the torch method `train` must no longer be overridden to ensure that the target networks + are never set to train mode/remain in eval mode (which was prone to errors), + * a method which updates all target networks with their source networks is automatically + provided and does not need to be implemented specifically for every algorithm + (`_update_lagged_network_weights`). + + All classes which make use of lagged networks were updated to use these mixins, simplifying + the implementations and reducing the potential for implementation errors. + (In the BCQ implementation, the VAE network was not correctly handled, but due to the way + in which examples were structured, it did not result in an error.) + +* Fixed issues in the class hierarchy (particularly critical violations of the Liskov substitution principle): + * Introduced base classes (to retain factorization without abusive inheritance): + * `ActorCriticOnPolicyAlgorithm` + * `ActorCriticOffPolicyAlgorithm` + * `ActorDualCriticsOffPolicyAlgorithm` (extends `ActorCriticOffPolicyAlgorithm`) + * `QLearningOffPolicyAlgorithm` + * `A2C`: Inherit from `ActorCriticOnPolicyAlgorithm` instead of `Reinforce` + * `BDQN`: + * Inherit from `QLearningOffPolicyAlgorithm` instead of `DQN` + * Remove parameter `clip_loss_grad` (unused; only passed on to former base class) + * Remove parameter `estimation_step`, for which only one option was valid + * `C51`: + * Inherit from `QLearningOffPolicyAlgorithm` instead of `DQN` + * Remove parameters `clip_loss_grad` and `is_double` (unused; only passed on to former base class) + * `CQL`: + * Inherit directly from `OfflineAlgorithm` instead of `SAC` (off-policy). + * Remove parameter `estimation_step` (now `n_step_return_horizon`), which was not actually used (it was only passed it on to its + superclass). + * `DiscreteBCQ`: + * Inherit directly from `OfflineAlgorithm` instead of `DQN` + * Remove unused parameters `clip_loss_grad` and `is_double`, which were only passed on to + former the base class but actually unused. + * `DiscreteCQL`: Remove unused parameters `clip_loss_grad` and `is_double`, which were only passed on to + base class `QRDQN` (and unused by it). + * `DiscreteCRR`: Inherit directly from `OfflineAlgorithm` instead of `Reinforce` (on-policy) + * `FQF`: Remove unused parameters `clip_loss_grad` and `is_double`, which were only passed on to + base class `QRDQN` (and unused by it). + * `IQN`: Remove unused parameters `clip_loss_grad` and `is_double`, which were only passed on to + base class `QRDQN` (and unused by it). + * `NPG`: Inherit from `ActorCriticOnPolicyAlgorithm` instead of `A2C` + * `QRDQN`: + * Inherit from `QLearningOffPolicyAlgorithm` instead of `DQN` + * Remove parameters `clip_loss_grad` and `is_double` (unused; only passed on to former base class) + * `REDQ`: Inherit from `ActorCriticOffPolicyAlgorithm` instead of `DDPG` + * `SAC`: Inherit from `ActorDualCriticsOffPolicyAlgorithm` instead of `DDPG` + * `TD3`: Inherit from `ActorDualCriticsOffPolicyAlgorithm` instead of `DDPG` + +### High-Level API + +* Detailed optimizer configuration (analogous to the procedural API) is now possible: + * All optimizers can be configured in the respective algorithm-specific `Params` object by using + `OptimizerFactoryFactory` instances as parameter values (e.g. `optim`, `actor_optim`, `critic_optim`, etc.). + * Learning rate schedulers remain separate parameters and now use `LRSchedulerFactoryFactory` + instances. The respective parameter names now use the suffix `lr_scheduler` instead of `lr_scheduler_factory` + (as the precise nature need not be reflected in the name; brevity is preferable). + +* `SamplingConfig` is replaced by `TrainingConfig` and subclasses differentiating off-policy and on-policy cases + appropriately (`OnPolicyTrainingConfig`, `OffPolicyTrainingConfig`). + * The `test_in_train` parameter is now exposed (default False). + * Inapplicable arguments can no longer be set in the respective subclass (e.g. `OffPolicyTrainingConfig` does not + contain parameter `repeat_per_collect`). + * All parameter names have been aligned with the new names used by `TrainerParams` (see above). + +### Peripheral Changes + +* The `Actor` classes have been renamed for clarity (#1091): + * `BaseActor` -> `Actor` + * `continuous.ActorProb` -> `ContinuousActorProbabilistic` + * `coninuous.Actor` -> `ContinuousActorDeterministic` + * `discrete.Actor` -> `DiscreteActor` +* The `Critic` classes have been renamed for clarity (#1091): + * `continuous.Critic` -> `ContinuousCritic` + * `discrete.Critic` -> `DiscreteCritic` +* Moved Atari helper modules `atari_network` and `atari_wrapper` to the library under `tianshou.env.atari`. +* Fix issues pertaining to the torch device assignment of network components (#810): + * Remove 'device' member (and the corresponding constructor argument) from the following classes: + `BranchingNet`, `C51Net`, `ContinuousActorDeterministic`, `ContinuousActorProbabilistic`, `ContinuousCritic`, + `DiscreteActor`, `DiscreteCritic`, `DQNet`, `FullQuantileFunction`, `ImplicitQuantileNetwork`, + `IntrinsicCuriosityModule`, `MLPActor`, `MLP`, `Perturbation`, `QRDQNet`, `Rainbow`, `Recurrent`, + `RecurrentActorProb`, `RecurrentCritic`, `VAE` + * (Peripheral change:) Require the use of keyword arguments for the constructors of all of these classes +* Clean up handling of modules that define attribute `output_dim`, introducing the explicit base class + `ModuleWithVectorOutput` + * Interfaces where one could specify either a module with `output_dim` or additionally provide the output + dimension as an argument were changed to use `ModuleWithVectorOutput`. + * The high-level API class `IntermediateModule` can now provide a `ModuleWithVectorOutput` instance + (via adaptation if necessary). +* The class hierarchy of supporting `nn.Module` implementations was cleaned up (#1091): + * With the fundamental base classes `ActionReprNet` and `ActionReprNetWithVectorOutput`, we etablished a + well-defined interface for the most commonly used `forward` interface in Tianshou's algorithms & policies. #948 + * Some network classes were renamed: + * `ScaledObsInputModule` -> `ScaledObsInputActionReprNet` + * `Rainbow` -> `RainbowNet` +* All modules containing base classes were renamed from `base` to a more descriptive name, rendering + file names unique. + +## Release 1.2.0 + +### Changes/Improvements + +- `trainer`: + - Custom scoring now supported for selecting the best model. #1202 +- `highlevel`: + - `DiscreteSACExperimentBuilder`: Expose method `with_actor_factory_default` #1248 #1250 + - `ActorFactoryDefault`: Fix parameters for hidden sizes and activation not being + passed on in the discrete case (affects `with_actor_factory_default` method of experiment builders) + - `ExperimentConfig`: Do not inherit from other classes, as this breaks automatic handling by + `jsonargparse` when the class is used to define interfaces (as in high-level API examples) + - `AutoAlphaFactoryDefault`: Differentiate discrete and continuous action spaces + and allow coefficient to be modified, adding an informative docstring + (previous implementation was reasonable only for continuous action spaces) + - Adjust usage in `atari_sac_hl` example accordingly. + - `NPGAgentFactory`, `TRPOAgentFactory`: Fix optimizer instantiation including the actor parameters + (which was misleadingly suggested in the docstring in the respective policy classes; docstrings were fixed), + as the actor parameters are intended to be handled via natural gradients internally +- `data`: + - `ReplayBuffer`: Fix collection of empty episodes being disallowed + - Collection was slow due to `isinstance` checks on Protocols and due to Buffer integrity validation. This was solved + by no longer performing `isinstance` on Protocols and by making the integrity validation disabled by default. +- Tests: + - We have introduced extensive **determinism tests** which allow to validate whether + training processes deterministically compute the same results across different development branches. + This is an important step towards ensuring reproducibility and consistency, which will be + instrumental in supporting Tianshou developers in their work, especially in the context of + algorithm development and evaluation. + +### Breaking Changes + +- `trainer`: + - `BaseTrainer.run` and `__iter__`: Resetting was never optional prior to running the trainer, + yet the recently introduced parameter `reset_prior_to_run` of `run` suggested that it _was_ optional. + Yet the parameter was ultimately not respected, because `__iter__` would always call `reset(reset_collectors=True, reset_buffer=False)` + regardless. The parameter was removed; instead, the parameters of `run` now mirror the parameters of `reset`, + and the implicit `reset` call in `__iter__` was removed. + This aligns with upcoming changes in Tianshou v2.0.0. + * NOTE: If you have been using a trainer without calling `run` but by directly iterating over it, you + will need to call `reset` on the trainer explicitly before iterating over the trainer. + * Using a trainer as an iterator is considered deprecated and support for this will be removed in Tianshou v2.0.0. +- `data`: + - `InfoStats` has a new non-optional field `best_score` which is used + for selecting the best model. #1202 +- `highlevel`: + - Change the way in which seeding is handled: The mechanism introduced in v1.1.0 + was completely revised: + - The `train_seed` and `test_seed` attributes were removed from `SamplingConfig`. + Instead, the seeds are derived from the seed defined in `ExperimentConfig`. + - Seed attributes of `EnvFactory` classes were removed. + Instead, seeds are passed to methods of `EnvFactory`. + +## Release 1.1.0 + +**NOTE**: This release introduced (potentially severe) performance regressions in data collection, please switch to a newer release for better performance. + +### Highlights + +#### Evaluation Package + +This release introduces a new package `evaluation` that integrates best +practices for running experiments (seeding test and train environmets) and for +evaluating them using the [rliable](https://github.com/google-research/rliable) +library. This should be especially useful for algorithm developers for comparing +performances and creating meaningful visualizations. **This functionality is +currently in alpha state** and will be further improved in the next releases. +You will need to install tianshou with the extra `eval` to use it. + +The creation of multiple experiments with varying random seeds has been greatly +facilitated. Moreover, the `ExpLauncher` interface has been introduced and +implemented with several backends to support the execution of multiple +experiments in parallel. + +An example for this using the high-level interfaces can be found +[here](examples/mujoco/mujoco_ppo_hl_multi.py), examples that use low-level +interfaces will follow soon. + +#### Improvements in Batch + +Apart from that, several important +extensions have been added to internal data structures, most notably to `Batch`. +Batches now implement `__eq__` and can be meaningfully compared. Applying +operations in a nested fashion has been significantly simplified, and checking +for NaNs and dropping them is now possible. + +One more notable change is that torch `Distribution` objects are now sliced when +slicing a batch. Previously, when a Batch with say 10 actions and a dist +corresponding to them was sliced to `[:3]`, the `dist` in the result would still +correspond to all 10 actions. Now, the dist is also "sliced" to be the +distribution of the first 3 actions. + +A detailed list of changes can be found below. + +### Changes/Improvements + +- `evaluation`: New package for repeating the same experiment with multiple + seeds and aggregating the results. #1074 #1141 #1183 +- `data`: + - `Batch`: + - Add methods `to_dict` and `to_list_of_dicts`. #1063 #1098 + - Add methods `to_numpy_` and `to_torch_`. #1098, #1117 + - Add `__eq__` (semantic equality check). #1098 + - `keys()` deprecated in favor of `get_keys()` (needed to make iteration + consistent with naming) #1105. + - Major: new methods for applying functions to values, to check for NaNs + and drop them, and to set values. #1181 + - Slicing a batch with a torch distribution now also slices the + distribution. #1181 + - `data.collector`: + - `Collector`: + - Introduced `BaseCollector` as a base class for all collectors. + #1123 + - Add method `close` #1063 + - Method `reset` is now more granular (new flags controlling + behavior). #1063 + - `CollectStats`: Add convenience + constructor `with_autogenerated_stats`. #1063 +- `trainer`: + - Trainers can now control whether collectors should be reset prior to + training. #1063 +- `policy`: + - introduced attribute `in_training_step` that is controlled by the trainer. + #1123 + - policy automatically set to `eval` mode when collecting and to `train` + mode when updating. #1123 + - Extended interface of `compute_action` to also support array-like inputs + #1169 +- `highlevel`: + - `SamplingConfig`: + - Add support for `batch_size=None`. #1077 + - Add `training_seed` for explicit seeding of training and test + environments, the `test_seed` is inferred from `training_seed`. #1074 + - `experiment`: + - `Experiment` now has a `name` attribute, which can be set + using `ExperimentBuilder.with_name` and + which determines the default run name and therefore the persistence + subdirectory. + It can still be overridden in `Experiment.run()`, the new parameter + name being `run_name` rather than + `experiment_name` (although the latter will still be interpreted + correctly). #1074 #1131 + - Add class `ExperimentCollection` for the convenient execution of + multiple experiment runs #1131 + - The `World` object, containing all low-level objects needed for + experimentation, + can now be extracted from an `Experiment` instance. This enables + customizing + the experiment prior to its execution, bridging the low and high-level + interfaces. #1187 + - `ExperimentBuilder`: + - Add method `build_seeded_collection` for the sound creation of + multiple + experiments with varying random seeds #1131 + - Add method `copy` to facilitate the creation of multiple + experiments from a single builder #1131 + - `env`: + - Added new `VectorEnvType` called `SUBPROC_SHARED_MEM_AUTO` and used in + for Atari and Mujoco venv creation. #1141 +- `utils`: + - `logger`: + - Loggers can now restore the logged data into python by using the + new `restore_logged_data` method. #1074 + - Wandb logger extended #1183 + - `net.continuous.Critic`: + - Add flag `apply_preprocess_net_to_obs_only` to allow the + preprocessing network to be applied to the observations only (without + the actions concatenated), which is essential for the case where we + want + to reuse the actor's preprocessing network #1128 + - `torch_utils` (new module) + - Added context managers `torch_train_mode` + and `policy_within_training_step` #1123 + - `print` + - `DataclassPPrintMixin` now supports outputting a string, not just + printing the pretty repr. #1141 + +### Fixes + +- `highlevel`: + - `CriticFactoryReuseActor`: Enable the Critic + flag `apply_preprocess_net_to_obs_only` for continuous critics, + fixing the case where we want to reuse an actor's preprocessing network + for the critic (affects usages + of the experiment builder method `with_critic_factory_use_actor` with + continuous environments) #1128 + - Policy parameter `action_scaling` value `"default"` was not correctly + transformed to a Boolean value for + algorithms SAC, DDPG, TD3 and REDQ. The value `"default"` being truthy + caused action scaling to be enabled + even for discrete action spaces. #1191 +- `atari_network.DQN`: + - Fix constructor input validation #1128 + - Fix `output_dim` not being set if `features_only`=True + and `output_dim_added_layer` is not None #1128 +- `PPOPolicy`: + - Fix `max_batchsize` not being used in `logp_old` computation + inside `process_fn` #1168 +- Fix `Batch.__eq__` to allow comparing Batches with scalar array values #1185 + +### Internal Improvements + +- `Collector`s rely less on state, the few stateful things are stored explicitly + instead of through a `.data` attribute. #1063 +- Introduced a first iteration of a naming convention for vars in `Collector`s. + #1063 +- Generally improved readability of Collector code and associated tests (still + quite some way to go). #1063 +- Improved typing for `exploration_noise` and within Collector. #1063 +- Better variable names related to model outputs (logits, dist input etc.). + #1032 +- Improved typing for actors and critics, using Tianshou classes + like `Actor`, `ActorProb`, etc., + instead of just `nn.Module`. #1032 +- Added interfaces for most `Actor` and `Critic` classes to enforce the presence + of `forward` methods. #1032 +- Simplified `PGPolicy` forward by unifying the `dist_fn` interface (see + associated breaking change). #1032 +- Use `.mode` of distribution instead of relying on knowledge of the + distribution type. #1032 +- Exception no longer raised on `len` of empty `Batch`. #1084 +- tests and examples are covered by `mypy`. #1077 +- `Actor` is more used, stricter typing by making it generic. #1077 +- Use explicit multiprocessing context for creating `Pipe` in `subproc.py`. + #1102 + +### Breaking Changes + +- `data`: + - `Collector`: + - Removed `.data` attribute. #1063 + - Collectors no longer reset the environment on initialization. + Instead, the user might have to call `reset` expicitly or + pass `reset_before_collect=True` . #1063 + - Removed `no_grad` argument from `collect` method (was unused in + tianshou). #1123 + - `Batch`: + - Fixed `iter(Batch(...)` which now behaves the same way + as `Batch(...).__iter__()`. + Can be considered a bugfix. #1063 + - The methods `to_numpy` and `to_torch` in are not in-place anymore + (use `to_numpy_` or `to_torch_` instead). #1098, #1117 + - The method `Batch.is_empty` has been removed. Instead, the user can + simply check for emptiness of Batch by using `len` on dicts. #1144 + - Stricter `cat_`, only concatenation of batches with the same structure + is allowed. #1181 + - `to_torch` and `to_numpy` are no longer static methods. + So `Batch.to_numpy(batch)` should be replaced by `batch.to_numpy()`. + #1200 +- `utils`: + - `logger`: + - `BaseLogger.prepare_dict_for_logging` is now abstract. #1074 + - Removed deprecated and unused `BasicLogger` (only affects users who + subclassed it). #1074 + - `utils.net`: + - `Recurrent` now receives and returns + a `RecurrentStateBatch` instead of a dict. #1077 + - Modules with code that was copied from sensAI have been replaced by + imports from new dependency sensAI-utils: + - `tianshou.utils.logging` is replaced with `sensai.util.logging` + - `tianshou.utils.string` is replaced with `sensai.util.string` + - `tianshou.utils.pickle` is replaced with `sensai.util.pickle` +- `env`: + - All VectorEnvs now return a numpy array of info-dicts on reset instead of + a list. #1063 +- `policy`: + - Changed interface of `dist_fn` in `PGPolicy` and all subclasses to take a + single argument in both + continuous and discrete cases. #1032 +- `AtariEnvFactory` constructor (in examples, so not really breaking) now + requires explicit train and test seeds. #1074 +- `EnvFactoryRegistered` now requires an explicit `test_seed` in the + constructor. #1074 +- `highlevel`: + - `params`: The parameter `dist_fn` has been removed from the parameter + objects (`PGParams`, `A2CParams`, `PPOParams`, `NPGParams`, `TRPOParams`). + The correct distribution is now determined automatically based on the + actor factory being used, avoiding the possibility of + misspecification. Persisted configurations/policies continue to work as + expected, but code must not specify the `dist_fn` parameter. + #1194 #1195 + - `env`: + - `EnvFactoryRegistered`: parameter `seed` has been replaced by the pair + of parameters `train_seed` and `test_seed` + Persisted instances will continue to work correctly. + Subclasses such as `AtariEnvFactory` are also affected requires + explicit train and test seeds. #1074 + - `VectorEnvType`: `SUBPROC_SHARED_MEM` has been replaced + by `SUBPROC_SHARED_MEM_DEFAULT`. It is recommended to + use `SUBPROC_SHARED_MEM_AUTO` instead. However, persisted configs will + continue working. #1141 + +### Tests + +- Fixed env seeding it `test_sac_with_il.py` so that the test doesn't fail + randomly. #1081 +- Improved CI triggers and added telemetry (if requested by user) #1177 +- Improved environment used in tests. +- Improved tests bach equality to check with scalar values #1185 + +### Dependencies + +- [DeepDiff](https://github.com/seperman/deepdiff) added to help with diffs of + batches in tests. #1098 +- Bumped black, idna, pillow +- New extra "eval" +- Bumped numba to >=60.0.0, permitting installation on python 3.12 # 1177 +- New dependency sensai-utils + +Started after v1.0.0 diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 000000000..4e3827b26 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,42 @@ +# Use the official Python image for the base image. +FROM --platform=linux/amd64 python:3.11-slim + +# Set environment variables to make Python print directly to the terminal and avoid .pyc files. +ENV PYTHONUNBUFFERED=1 +ENV PYTHONDONTWRITEBYTECODE=1 + +# Install system dependencies required for the project. +RUN apt-get update && apt-get install -y --no-install-recommends \ + curl \ + build-essential \ + git \ + wget \ + unzip \ + libvips-dev \ + gnupg2 \ + && rm -rf /var/lib/apt/lists/* + + +# Install pipx. +RUN python3 -m pip install --no-cache-dir pipx \ + && pipx ensurepath + +# Add poetry to the path +ENV PATH="${PATH}:/root/.local/bin" + +# Install the latest version of Poetry using pipx. +RUN pipx install poetry + +# Set the working directory. IMPORTANT: can't be changed as needs to be in sync to the dir where the project is cloned +# to in the codespace +WORKDIR /workspaces/tianshou + +# Copy the pyproject.toml and poetry.lock files (if available) into the image. +COPY pyproject.toml poetry.lock* README.md /workspaces/tianshou/ + +RUN poetry config virtualenvs.create false +RUN poetry install --no-root --with dev + +# The entrypoint will perform an editable install, it is expected that the code is mounted in the container then +# If you don't want to mount the code, you should override the entrypoint +ENTRYPOINT ["/bin/bash", "-c", "poetry install --with dev && poetry run jupyter trust notebooks/*.ipynb docs/02_notebooks/*.ipynb && $0 $@"] \ No newline at end of file diff --git a/Makefile b/Makefile deleted file mode 100644 index e12185604..000000000 --- a/Makefile +++ /dev/null @@ -1,63 +0,0 @@ -SHELL=/bin/bash -PROJECT_NAME=tianshou -PROJECT_PATH=${PROJECT_NAME}/ -PYTHON_FILES = $(shell find setup.py ${PROJECT_NAME} test docs/conf.py examples -type f -name "*.py") - -check_install = python3 -c "import $(1)" || pip3 install $(1) --upgrade -check_install_extra = python3 -c "import $(1)" || pip3 install $(2) --upgrade - -pytest: - $(call check_install, pytest) - $(call check_install, pytest_cov) - $(call check_install, pytest_xdist) - pytest test --cov ${PROJECT_PATH} --durations 0 -v --cov-report term-missing --color=yes - -mypy: - $(call check_install, mypy) - mypy ${PROJECT_NAME} - -lint: - $(call check_install, flake8) - $(call check_install_extra, bugbear, flake8_bugbear) - flake8 ${PYTHON_FILES} --count --show-source --statistics - -format: - $(call check_install, isort) - isort ${PYTHON_FILES} - $(call check_install, yapf) - yapf -ir ${PYTHON_FILES} - -check-codestyle: - $(call check_install, isort) - $(call check_install, yapf) - isort --check ${PYTHON_FILES} && yapf -r -d ${PYTHON_FILES} - -check-docstyle: - $(call check_install, pydocstyle) - $(call check_install, doc8) - $(call check_install, sphinx) - $(call check_install, sphinx_rtd_theme) - $(call check_install, sphinxcontrib.bibtex, sphinxcontrib_bibtex) - pydocstyle ${PROJECT_PATH} && doc8 docs && cd docs && make html SPHINXOPTS="-W" - -doc: - $(call check_install, sphinx) - $(call check_install, sphinx_rtd_theme) - $(call check_install, sphinxcontrib.bibtex, sphinxcontrib_bibtex) - cd docs && make html && cd _build/html && python3 -m http.server - -spelling: - $(call check_install, sphinx) - $(call check_install, sphinx_rtd_theme) - $(call check_install_extra, sphinxcontrib.spelling, sphinxcontrib.spelling pyenchant) - $(call check_install, sphinxcontrib.bibtex, sphinxcontrib_bibtex) - cd docs && make spelling SPHINXOPTS="-W" - -doc-clean: - cd docs && make clean - -clean: doc-clean - -commit-checks: lint check-codestyle mypy check-docstyle spelling - -.PHONY: clean spelling doc mypy lint format check-codestyle check-docstyle commit-checks diff --git a/README.md b/README.md index d6a9300b4..0b3ed1f3a 100644 --- a/README.md +++ b/README.md @@ -4,16 +4,35 @@ --- -[![PyPI](https://img.shields.io/pypi/v/tianshou)](https://pypi.org/project/tianshou/) [![Conda](https://img.shields.io/conda/vn/conda-forge/tianshou)](https://github.com/conda-forge/tianshou-feedstock) [![Read the Docs](https://img.shields.io/readthedocs/tianshou)](https://tianshou.readthedocs.io/en/master) [![Read the Docs](https://img.shields.io/readthedocs/tianshou-docs-zh-cn?label=%E4%B8%AD%E6%96%87%E6%96%87%E6%A1%A3)](https://tianshou.readthedocs.io/zh/master/) [![Unittest](https://github.com/thu-ml/tianshou/workflows/Unittest/badge.svg?branch=master)](https://github.com/thu-ml/tianshou/actions) [![codecov](https://img.shields.io/codecov/c/gh/thu-ml/tianshou)](https://codecov.io/gh/thu-ml/tianshou) [![GitHub issues](https://img.shields.io/github/issues/thu-ml/tianshou)](https://github.com/thu-ml/tianshou/issues) [![GitHub stars](https://img.shields.io/github/stars/thu-ml/tianshou)](https://github.com/thu-ml/tianshou/stargazers) [![GitHub forks](https://img.shields.io/github/forks/thu-ml/tianshou)](https://github.com/thu-ml/tianshou/network) [![GitHub license](https://img.shields.io/github/license/thu-ml/tianshou)](https://github.com/thu-ml/tianshou/blob/master/LICENSE) +[![PyPI](https://img.shields.io/pypi/v/tianshou)](https://pypi.org/project/tianshou/) [![Conda](https://img.shields.io/conda/vn/conda-forge/tianshou)](https://github.com/conda-forge/tianshou-feedstock) [![Read the Docs](https://readthedocs.org/projects/tianshou/badge/?version=master)](https://tianshou.org/en/master/) [![Pytest](https://github.com/thu-ml/tianshou/actions/workflows/pytest.yml/badge.svg)](https://github.com/thu-ml/tianshou/actions) [![codecov](https://img.shields.io/codecov/c/gh/thu-ml/tianshou)](https://codecov.io/gh/thu-ml/tianshou) [![GitHub issues](https://img.shields.io/github/issues/thu-ml/tianshou)](https://github.com/thu-ml/tianshou/issues) [![GitHub stars](https://img.shields.io/github/stars/thu-ml/tianshou)](https://github.com/thu-ml/tianshou/stargazers) [![GitHub forks](https://img.shields.io/github/forks/thu-ml/tianshou)](https://github.com/thu-ml/tianshou/network) [![GitHub license](https://img.shields.io/github/license/thu-ml/tianshou)](https://github.com/thu-ml/tianshou/blob/master/LICENSE) -> ⚠️️ **Transition to Gymnasium**: The maintainers of OpenAI Gym have recently released [Gymnasium](http://github.com/Farama-Foundation/Gymnasium), -> which is where future maintenance of OpenAI Gym will be taking place. -> Tianshou has transitioned to internally using Gymnasium environments. You can still use OpenAI Gym environments with -> Tianshou vector environments, but they will be wrapped in a compatibility layer, which could be a source of issues. -> We recommend that you update your environment code to Gymnasium. If you want to continue using OpenAI Gym with -> Tianshou, you need to manually install Gym and [Shimmy](https://github.com/Farama-Foundation/Shimmy) (the compatibility layer). +> ℹ️ **Introducing Tianshou version 2** +> +> We have just released the first beta version 2.0.0b1 of the new major version of Tianshou, and we invite you to try it! +> Version 2 is a complete overhaul of the software design of the procedural API, in which +> * we establish a clear separation between learning algorithms and policies (via the separate abstractions `Algorithm` and `Policy`). +> * we provide more well-defined, more usable interfaces with extensive documentation of all algorithm and trainer parameters, +> renaming some parameters to make their names more consistent and intuitive. +> * the class hierarchy is fully revised, establishing a clear separation between on-policy, off-policy and offline algorithms +> at the type level and ensuring that all inheritance relationships are meaningful. +> +> Because of the extent of the changes, this version is not backwards compatible with previous versions of Tianshou. +> For migration information, please see the [change log](CHANGELOG.md). -**Tianshou** ([天授](https://baike.baidu.com/item/%E5%A4%A9%E6%8E%88)) is a reinforcement learning platform based on pure PyTorch. Unlike existing reinforcement learning libraries, which are mainly based on TensorFlow, have many nested classes, unfriendly API, or slow-speed, Tianshou provides a fast-speed modularized framework and pythonic API for building the deep reinforcement learning agent with the least number of lines of code. The supported interface algorithms currently include: +**Tianshou** ([天授](https://baike.baidu.com/item/%E5%A4%A9%E6%8E%88)) is a reinforcement learning (RL) library based on pure PyTorch and [Gymnasium](http://github.com/Farama-Foundation/Gymnasium). Tianshou's main features at a glance are: + +1. Modular low-level interfaces for algorithm developers (RL researchers) that are both flexible, hackable and type-safe. +1. Convenient high-level interfaces for applications of RL (training an implemented algorithm on a custom environment). +1. Large scope: online (on- and off-policy) and offline RL, experimental support for multi-agent RL (MARL), experimental support for model-based RL, and more + +Unlike other reinforcement learning libraries, which may have complex codebases, +unfriendly high-level APIs, or are not optimized for speed, Tianshou provides a high-performance, modularized framework +and user-friendly interfaces for building deep reinforcement learning agents. One more aspect that sets Tianshou apart is its +generality: it supports online and offline RL, multi-agent RL, and model-based algorithms. + +Tianshou aims at enabling concise implementations, both for researchers and practitioners, without sacrificing flexibility. + +Supported algorithms include: - [Deep Q-Network (DQN)](https://storage.googleapis.com/deepmind-media/dqn/DQNNaturePaper.pdf) - [Double DQN](https://arxiv.org/pdf/1509.06461.pdf) @@ -34,7 +53,7 @@ - [Soft Actor-Critic (SAC)](https://arxiv.org/pdf/1812.05905.pdf) - [Randomized Ensembled Double Q-Learning (REDQ)](https://arxiv.org/pdf/2101.05982.pdf) - [Discrete Soft Actor-Critic (SAC-Discrete)](https://arxiv.org/pdf/1910.07207.pdf) -- Vanilla Imitation Learning +- [Vanilla Imitation Learning](https://en.wikipedia.org/wiki/Apprenticeship_learning) - [Batch-Constrained deep Q-Learning (BCQ)](https://arxiv.org/pdf/1812.02900.pdf) - [Conservative Q-Learning (CQL)](https://arxiv.org/pdf/2006.04779.pdf) - [Twin Delayed DDPG with Behavior Cloning (TD3+BC)](https://arxiv.org/pdf/2106.06860.pdf) @@ -48,210 +67,390 @@ - [Intrinsic Curiosity Module (ICM)](https://arxiv.org/pdf/1705.05363.pdf) - [Hindsight Experience Replay (HER)](https://arxiv.org/pdf/1707.01495.pdf) -Here are Tianshou's other features: - -- Elegant framework, using only ~4000 lines of code -- State-of-the-art [MuJoCo benchmark](https://github.com/thu-ml/tianshou/tree/master/examples/mujoco) for REINFORCE/A2C/TRPO/PPO/DDPG/TD3/SAC algorithms -- Support vectorized environment (synchronous or asynchronous) for all algorithms [Usage](https://tianshou.readthedocs.io/en/master/tutorials/cheatsheet.html#parallel-sampling) -- Support super-fast vectorized environment [EnvPool](https://github.com/sail-sg/envpool/) for all algorithms [Usage](https://tianshou.readthedocs.io/en/master/tutorials/cheatsheet.html#envpool-integration) -- Support recurrent state representation in actor network and critic network (RNN-style training for POMDP) [Usage](https://tianshou.readthedocs.io/en/master/tutorials/cheatsheet.html#rnn-style-training) -- Support any type of environment state/action (e.g. a dict, a self-defined class, ...) [Usage](https://tianshou.readthedocs.io/en/master/tutorials/cheatsheet.html#user-defined-environment-and-different-state-representation) -- Support customized training process [Usage](https://tianshou.readthedocs.io/en/master/tutorials/cheatsheet.html#customize-training-process) -- Support n-step returns estimation and prioritized experience replay for all Q-learning based algorithms; GAE, nstep and PER are very fast thanks to numba jit function and vectorized numpy operation -- Support multi-agent RL [Usage](https://tianshou.readthedocs.io/en/master/tutorials/cheatsheet.html#multi-agent-reinforcement-learning) -- Support both [TensorBoard](https://www.tensorflow.org/tensorboard) and [W&B](https://wandb.ai/) log tools -- Support multi-GPU training [Usage](https://tianshou.readthedocs.io/en/master/tutorials/cheatsheet.html#multi-gpu) +Other noteworthy features: + +- Elegant framework with dual APIs: + - Tianshou's high-level API maximizes ease of use for application development while still retaining a high degree + of flexibility. + - The fundamental procedural API provides a maximum of flexibility for algorithm development without being + overly verbose. +- State-of-the-art results in [MuJoCo benchmarks](https://github.com/thu-ml/tianshou/tree/master/examples/mujoco) for REINFORCE/A2C/TRPO/PPO/DDPG/TD3/SAC algorithms +- Support for vectorized environments (synchronous or asynchronous) for all algorithms (see [usage](https://tianshou.readthedocs.io/en/master/01_tutorials/07_cheatsheet.html#parallel-sampling)) +- Support for super-fast vectorized environments based on [EnvPool](https://github.com/sail-sg/envpool/) for all algorithms (see [usage](https://tianshou.readthedocs.io/en/master/01_tutorials/07_cheatsheet.html#envpool-integration)) +- Support for recurrent state representations in actor networks and critic networks (RNN-style training for POMDPs) (see [usage](https://tianshou.readthedocs.io/en/master/01_tutorials/07_cheatsheet.html#rnn-style-training)) +- Support any type of environment state/action (e.g. a dict, a self-defined class, ...) [Usage](https://tianshou.readthedocs.io/en/master/01_tutorials/07_cheatsheet.html#user-defined-environment-and-different-state-representation) +- Support for customized training processes (see [usage](https://tianshou.readthedocs.io/en/master/01_tutorials/07_cheatsheet.html#customize-training-process)) +- Support n-step returns estimation and prioritized experience replay for all Q-learning based algorithms; GAE, nstep and PER are highly optimized thanks to numba's just-in-time compilation and vectorized numpy operations +- Support for multi-agent RL (see [usage](https://tianshou.readthedocs.io/en/master/01_tutorials/07_cheatsheet.html#multi-agent-reinforcement-learning)) +- Support for logging based on both [TensorBoard](https://www.tensorflow.org/tensorboard) and [W&B](https://wandb.ai/) +- Support for multi-GPU training (see [usage](https://tianshou.readthedocs.io/en/master/01_tutorials/07_cheatsheet.html#multi-gpu)) - Comprehensive documentation, PEP8 code-style checking, type checking and thorough [tests](https://github.com/thu-ml/tianshou/actions) -In Chinese, Tianshou means divinely ordained and is derived to the gift of being born with. Tianshou is a reinforcement learning platform, and the RL algorithm does not learn from humans. So taking "Tianshou" means that there is no teacher to study with, but rather to learn by themselves through constant interaction with the environment. +In Chinese, Tianshou means divinely ordained, being derived to the gift of being born. +Tianshou is a reinforcement learning platform, and the nature of RL is not learn from humans. +So taking "Tianshou" means that there is no teacher to learn from, but rather to learn by oneself through constant interaction with the environment. “天授”意指上天所授,引申为与生具有的天赋。天授是强化学习平台,而强化学习算法并不是向人类学习的,所以取“天授”意思是没有老师来教,而是自己通过跟环境不断交互来进行学习。 ## Installation -Tianshou is currently hosted on [PyPI](https://pypi.org/project/tianshou/) and [conda-forge](https://github.com/conda-forge/tianshou-feedstock). It requires Python >= 3.6. +Tianshou is currently hosted on [PyPI](https://pypi.org/project/tianshou/) and [conda-forge](https://github.com/conda-forge/tianshou-feedstock). It requires Python >= 3.11. + +For installing the most recent version of Tianshou, the best way is clone the repository and install it with [poetry](https://python-poetry.org/) +(which you need to install on your system first) + +```bash +git clone git@github.com:thu-ml/tianshou.git +cd tianshou +poetry install +``` + +You can also install the dev requirements by adding `--with dev` or the extras +for say mujoco and acceleration by [envpool](https://github.com/sail-sg/envpool) +by adding `--extras "mujoco envpool"` + +If you wish to install multiple extras, ensure that you include them in a single command. Sequential calls to `poetry install --extras xxx` will overwrite prior installations, leaving only the last specified extras installed. +Or you may install all the following extras by adding `--all-extras`. -You can simply install Tianshou from PyPI with the following command: +Available extras are: + +- `atari` (for Atari environments) +- `box2d` (for Box2D environments) +- `classic_control` (for classic control (discrete) environments) +- `mujoco` (for MuJoCo environments) +- `mujoco-py` (for legacy mujoco-py environments[^1]) +- `pybullet` (for pybullet environments) +- `robotics` (for gymnasium-robotics environments) +- `vizdoom` (for ViZDoom environments) +- `envpool` (for [envpool](https://github.com/sail-sg/envpool/) integration) +- `argparse` (in order to be able to run the high level API examples) + +[^1]: + `mujoco-py` is a legacy package and is not recommended for new projects. + It is only included for compatibility with older projects. + Also note that there may be compatibility issues with macOS newer than + Monterey. + +Otherwise, you can install the latest release from PyPI (currently +far behind the master) with the following command: ```bash $ pip install tianshou ``` -If you use Anaconda or Miniconda, you can install Tianshou from conda-forge through the following command: +If you are using Anaconda or Miniconda, you can install Tianshou from conda-forge: ```bash -$ conda install -c conda-forge tianshou +$ conda install tianshou -c conda-forge ``` -You can also install with the newest version through GitHub: +Alternatively to the poetry install, you can also install the latest source version through GitHub: ```bash $ pip install git+https://github.com/thu-ml/tianshou.git@master --upgrade ``` -After installation, open your python console and type +Finally, you may check the installation via your Python console as follows: ```python import tianshou print(tianshou.__version__) ``` -If no error occurs, you have successfully installed Tianshou. +If no errors are reported, you have successfully installed Tianshou. ## Documentation -The tutorials and API documentation are hosted on [tianshou.readthedocs.io](https://tianshou.readthedocs.io/). - -The example scripts are under [test/](https://github.com/thu-ml/tianshou/blob/master/test) folder and [examples/](https://github.com/thu-ml/tianshou/blob/master/examples) folder. - -中文文档位于 [https://tianshou.readthedocs.io/zh/master/](https://tianshou.readthedocs.io/zh/master/)。 +Find example scripts in the [test/]( https://github.com/thu-ml/tianshou/blob/master/test) and [examples/](https://github.com/thu-ml/tianshou/blob/master/examples) folders. - +Tutorials and API documentation are hosted on [tianshou.readthedocs.io](https://tianshou.readthedocs.io/). +**Important**: The documentation is currently being updated to reflect the changes in Tianshou v2.0.0. Not all features are documented yet, and some parts are outdated (they are marked as such). The documentation will be fully updated when +the v2.0.0 release is finalized. ## Why Tianshou? ### Comprehensive Functionality -| RL Platform | GitHub Stars | # of Alg. (1) | Custom Env | Batch Training | RNN Support | Nested Observation | Backend | -| ------------------------------------------------------------------ | ----------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------ |--------------------------------| --------------------------------- | ------------------ | ------------------ | ---------- | -| [Baselines](https://github.com/openai/baselines) | [![GitHub stars](https://img.shields.io/github/stars/openai/baselines)](https://github.com/openai/baselines/stargazers) | 9 | :heavy_check_mark: (gym) | :heavy_minus_sign: (2) | :heavy_check_mark: | :x: | TF1 | -| [Stable-Baselines](https://github.com/hill-a/stable-baselines) | [![GitHub stars](https://img.shields.io/github/stars/hill-a/stable-baselines)](https://github.com/hill-a/stable-baselines/stargazers) | 11 | :heavy_check_mark: (gym) | :heavy_minus_sign: (2) | :heavy_check_mark: | :x: | TF1 | -| [Stable-Baselines3](https://github.com/DLR-RM/stable-baselines3) | [![GitHub stars](https://img.shields.io/github/stars/DLR-RM/stable-baselines3)](https://github.com/DLR-RM/stable-baselines3/stargazers) | 7 (3) | :heavy_check_mark: (gym) | :heavy_minus_sign: (2) | :x: | :heavy_check_mark: | PyTorch | -| [Ray/RLlib](https://github.com/ray-project/ray/tree/master/rllib/) | [![GitHub stars](https://img.shields.io/github/stars/ray-project/ray)](https://github.com/ray-project/ray/stargazers) | 16 | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | TF/PyTorch | -| [SpinningUp](https://github.com/openai/spinningup) | [![GitHub stars](https://img.shields.io/github/stars/openai/spinningup)](https://github.com/openai/spinningupstargazers) | 6 | :heavy_check_mark: (gym) | :heavy_minus_sign: (2) | :x: | :x: | PyTorch | -| [Dopamine](https://github.com/google/dopamine) | [![GitHub stars](https://img.shields.io/github/stars/google/dopamine)](https://github.com/google/dopamine/stargazers) | 7 | :x: | :x: | :x: | :x: | TF/JAX | -| [ACME](https://github.com/deepmind/acme) | [![GitHub stars](https://img.shields.io/github/stars/deepmind/acme)](https://github.com/deepmind/acme/stargazers) | 14 | :heavy_check_mark: (dm_env) | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | TF/JAX | -| [keras-rl](https://github.com/keras-rl/keras-rl) | [![GitHub stars](https://img.shields.io/github/stars/keras-rl/keras-rl)](https://github.com/keras-rl/keras-rlstargazers) | 7 | :heavy_check_mark: (gym) | :x: | :x: | :x: | Keras | -| [rlpyt](https://github.com/astooke/rlpyt) | [![GitHub stars](https://img.shields.io/github/stars/astooke/rlpyt)](https://github.com/astooke/rlpyt/stargazers) | 11 | :x: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | PyTorch | -| [ChainerRL](https://github.com/chainer/chainerrl) | [![GitHub stars](https://img.shields.io/github/stars/chainer/chainerrl)](https://github.com/chainer/chainerrl/stargazers) | 18 | :heavy_check_mark: (gym) | :heavy_check_mark: | :heavy_check_mark: | :x: | Chainer | -| [Sample Factory](https://github.com/alex-petrenko/sample-factory) | [![GitHub stars](https://img.shields.io/github/stars/alex-petrenko/sample-factory)](https://github.com/alex-petrenko/sample-factory/stargazers) | 1 (4) | :heavy_check_mark: (gym) | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | PyTorch | -| | | | | | | | | -| [Tianshou](https://github.com/thu-ml/tianshou) | [![GitHub stars](https://img.shields.io/github/stars/thu-ml/tianshou)](https://github.com/thu-ml/tianshou/stargazers) | 20 | :heavy_check_mark: (Gymnasium) | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | PyTorch | - -(1): access date: 2021-08-08 - -(2): not all algorithms support this feature - -(3): TQC and QR-DQN in [sb3-contrib](https://github.com/Stable-Baselines-Team/stable-baselines3-contrib) instead of main repo - -(4): super fast APPO! - -### High quality software engineering standard - -| RL Platform | Documentation | Code Coverage | Type Hints | Last Update | -| ------------------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------ | ----------------------------------------------------------------------------------------------------------------- | -| [Baselines](https://github.com/openai/baselines) | :x: | :x: | :x: | ![GitHub last commit](https://img.shields.io/github/last-commit/openai/baselines?label=last%20update) | -| [Stable-Baselines](https://github.com/hill-a/stable-baselines) | [![Documentation Status](https://readthedocs.org/projects/stable-baselines/badge/?version=master)](https://stable-baselines.readthedocs.io/en/master/?badge=master) | [![coverage](https://img.shields.io/badge/coverage-86%25-brightgreen.svg?style=flat)](https://www.codacy.com/app/baselines_janitors/stable-baselines?utm_source=github.com&utm_medium=referral&utm_content=hill-a/stable-baselines&utm_campaign=Badge_Coverage) | :x: | ![GitHub last commit](https://img.shields.io/github/last-commit/hill-a/stable-baselines?label=last%20update) | -| [Stable-Baselines3](https://github.com/DLR-RM/stable-baselines3) | [![Documentation Status](https://readthedocs.org/projects/stable-baselines/badge/?version=master)](https://stable-baselines3.readthedocs.io/en/master/?badge=master) | [![coverage report](https://gitlab.com/araffin/stable-baselines3/badges/master/coverage.svg)](https://gitlab.com/araffin/stable-baselines3/-/commits/master) | :heavy_check_mark: | ![GitHub last commit](https://img.shields.io/github/last-commit/DLR-RM/stable-baselines3?label=last%20update) | -| [Ray/RLlib](https://github.com/ray-project/ray/tree/master/rllib/) | [![](https://readthedocs.org/projects/ray/badge/?version=master)](http://docs.ray.io/en/master/rllib.html) | :heavy_minus_sign:(1) | :heavy_check_mark: | ![GitHub last commit](https://img.shields.io/github/last-commit/ray-project/ray?label=last%20update) | -| [SpinningUp](https://github.com/openai/spinningup) | [![](https://img.shields.io/readthedocs/spinningup)](https://spinningup.openai.com/) | :x: | :x: | ![GitHub last commit](https://img.shields.io/github/last-commit/openai/spinningup?label=last%20update) | -| [Dopamine](https://github.com/google/dopamine) | [![](https://img.shields.io/badge/docs-passing-green)](https://github.com/google/dopamine/tree/master/docs) | :x: | :x: | ![GitHub last commit](https://img.shields.io/github/last-commit/google/dopamine?label=last%20update) | -| [ACME](https://github.com/deepmind/acme) | [![](https://img.shields.io/badge/docs-passing-green)](https://github.com/deepmind/acme/blob/master/docs/index.md) | :heavy_minus_sign:(1) | :heavy_check_mark: | ![GitHub last commit](https://img.shields.io/github/last-commit/deepmind/acme?label=last%20update) | -| [keras-rl](https://github.com/keras-rl/keras-rl) | [![Documentation](https://readthedocs.org/projects/keras-rl/badge/)](http://keras-rl.readthedocs.io/) | :heavy_minus_sign:(1) | :x: | ![GitHub last commit](https://img.shields.io/github/last-commit/keras-rl/keras-rl?label=last%20update) | -| [rlpyt](https://github.com/astooke/rlpyt) | [![Docs](https://readthedocs.org/projects/rlpyt/badge/?version=latest&style=flat)](https://rlpyt.readthedocs.io/en/latest/) | [![codecov](https://codecov.io/gh/astooke/rlpyt/graph/badge.svg)](https://codecov.io/gh/astooke/rlpyt) | :x: | ![GitHub last commit](https://img.shields.io/github/last-commit/astooke/rlpyt?label=last%20update) | -| [ChainerRL](https://github.com/chainer/chainerrl) | [![Documentation Status](https://readthedocs.org/projects/chainerrl/badge/?version=latest)](http://chainerrl.readthedocs.io/en/latest/?badge=latest) | [![Coverage Status](https://coveralls.io/repos/github/chainer/chainerrl/badge.svg?branch=master)](https://coveralls.io/github/chainer/chainerrl?branch=master) | :x: | ![GitHub last commit](https://img.shields.io/github/last-commit/chainer/chainerrl?label=last%20update) | -| [Sample Factory](https://github.com/alex-petrenko/sample-factory) | [:heavy_minus_sign:](https://arxiv.org/abs/2006.11751) | [![codecov](https://codecov.io/gh/alex-petrenko/sample-factory/branch/master/graph/badge.svg)](https://codecov.io/gh/alex-petrenko/sample-factory) | :x: | ![GitHub last commit](https://img.shields.io/github/last-commit/alex-petrenko/sample-factory?label=last%20update) | -| | | | | | -| [Tianshou](https://github.com/thu-ml/tianshou) | [![Read the Docs](https://img.shields.io/readthedocs/tianshou)](https://tianshou.readthedocs.io/en/master) | [![codecov](https://img.shields.io/codecov/c/gh/thu-ml/tianshou)](https://codecov.io/gh/thu-ml/tianshou) | :heavy_check_mark: | ![GitHub last commit](https://img.shields.io/github/last-commit/thu-ml/tianshou?label=last%20update) | +### High Software Engineering Standards + +| RL Platform | Documentation | Code Coverage | Type Hints | Last Update | +| ------------------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------ | ----------------------------------------------------------------------------------------------------------------- | +| [Stable-Baselines3](https://github.com/DLR-RM/stable-baselines3) | [![Documentation Status](https://readthedocs.org/projects/stable-baselines/badge/?version=master)](https://stable-baselines3.readthedocs.io/en/master/?badge=master) | [![coverage report](https://gitlab.com/araffin/stable-baselines3/badges/master/coverage.svg)](https://gitlab.com/araffin/stable-baselines3/-/commits/master) | :heavy_check_mark: | ![GitHub last commit](https://img.shields.io/github/last-commit/DLR-RM/stable-baselines3?label=last%20update) | +| [Ray/RLlib](https://github.com/ray-project/ray/tree/master/rllib/) | [![](https://readthedocs.org/projects/ray/badge/?version=master)](http://docs.ray.io/en/master/rllib.html) | :heavy_minus_sign:(1) | :heavy_check_mark: | ![GitHub last commit](https://img.shields.io/github/last-commit/ray-project/ray?label=last%20update) | +| [SpinningUp](https://github.com/openai/spinningup) | [![](https://img.shields.io/readthedocs/spinningup)](https://spinningup.openai.com/) | :x: | :x: | ![GitHub last commit](https://img.shields.io/github/last-commit/openai/spinningup?label=last%20update) | +| [Dopamine](https://github.com/google/dopamine) | [![](https://img.shields.io/badge/docs-passing-green)](https://github.com/google/dopamine/tree/master/docs) | :x: | :x: | ![GitHub last commit](https://img.shields.io/github/last-commit/google/dopamine?label=last%20update) | +| [ACME](https://github.com/deepmind/acme) | [![](https://img.shields.io/badge/docs-passing-green)](https://github.com/deepmind/acme/blob/master/docs/index.md) | :heavy_minus_sign:(1) | :heavy_check_mark: | ![GitHub last commit](https://img.shields.io/github/last-commit/deepmind/acme?label=last%20update) | +| [Sample Factory](https://github.com/alex-petrenko/sample-factory) | [:heavy_minus_sign:](https://arxiv.org/abs/2006.11751) | [![codecov](https://codecov.io/gh/alex-petrenko/sample-factory/branch/master/graph/badge.svg)](https://codecov.io/gh/alex-petrenko/sample-factory) | :x: | ![GitHub last commit](https://img.shields.io/github/last-commit/alex-petrenko/sample-factory?label=last%20update) | +| | | | | | +| [Tianshou](https://github.com/thu-ml/tianshou) | [![Read the Docs](https://img.shields.io/readthedocs/tianshou)](https://tianshou.readthedocs.io/en/master) | [![codecov](https://img.shields.io/codecov/c/gh/thu-ml/tianshou)](https://codecov.io/gh/thu-ml/tianshou) | :heavy_check_mark: | ![GitHub last commit](https://img.shields.io/github/last-commit/thu-ml/tianshou?label=last%20update) | (1): it has continuous integration but the coverage rate is not available -### Reproducible and High Quality Result +### Reproducible, High-Quality Results + +Tianshou is rigorously tested. In contrast to other RL platforms, **our tests include the full agent training procedure for all of the implemented algorithms**. Our tests would fail once if any of the agents failed to achieve a consistent level of performance on limited epochs. +Our tests thus ensure reproducibility. +Check out the [GitHub Actions](https://github.com/thu-ml/tianshou/actions) page for more detail. + +Atari and MuJoCo benchmark results can be found in the [examples/atari/](examples/atari/) and [examples/mujoco/](examples/mujoco/) folders respectively. **Our MuJoCo results reach or exceed the level of performance of most existing benchmarks.** -Tianshou has its tests. Different from other platforms, **the tests include the full agent training procedure for all of the implemented algorithms**. It would be failed once if it could not train an agent to perform well enough on limited epochs on toy scenarios. The tests secure the reproducibility of our platform. Check out the [GitHub Actions](https://github.com/thu-ml/tianshou/actions) page for more detail. +### Algorithm Abstraction -The Atari/Mujoco benchmark results are under [examples/atari/](examples/atari/) and [examples/mujoco/](examples/mujoco/) folders. **Our Mujoco result can beat most of existing benchmark.** +Reinforcement learning algorithms are build on abstractions for -### Modularized Policy +- on-policy algorithms (`OnPolicyAlgorithm`), +- off-policy algorithms (`OffPolicyAlgorithm`), and +- offline algorithms (`OfflineAlgorithm`), -We decouple all of the algorithms roughly into the following parts: +all of which clearly separate the core algorithm from the training process and the respective environment interactions. -- `__init__`: initialize the policy; -- `forward`: to compute actions over given observations; -- `process_fn`: to preprocess data from replay buffer (since we have reformulated all algorithms to replay-buffer based algorithms); -- `learn`: to learn from a given batch data; -- `post_process_fn`: to update the replay buffer from the learning process (e.g., prioritized replay buffer needs to update the weight); -- `update`: the main interface for training, i.e., `process_fn -> learn -> post_process_fn`. +In each case, the implementation of an algorithm necessarily involves only the implementation of methods for -Within this API, we can interact with different policies conveniently. +- pre-processing a batch of data, augmenting it with necessary information/sufficient statistics for learning (`_preprocess_batch`), +- updating model parameters based on an augmented batch of data (`_update_with_batch`). + +The implementation of these methods suffices for a new algorithm to be applicable within Tianshou, +making experimentation with new approaches particularly straightforward. ## Quick Start -This is an example of Deep Q Network. You can also run the full script at [test/discrete/test_dqn.py](https://github.com/thu-ml/tianshou/blob/master/test/discrete/test_dqn.py). +Tianshou provides two API levels: + +- the high-level interface, which provides ease of use for end users seeking to run deep reinforcement learning applications +- the procedural interface, which provides a maximum of control, especially for very advanced users and developers of reinforcement learning algorithms. -First, import some relevant packages: +In the following, let us consider an example application using the _CartPole_ gymnasium environment. +We shall apply the deep Q-network (DQN) learning algorithm using both APIs. + +### High-Level API + +In the high-level API, the basis for an RL experiment is an `ExperimentBuilder` +with which we can build the experiment we then seek to run. +Since we want to use DQN, we use the specialization `DQNExperimentBuilder`. + +The high-level API provides largely declarative semantics, i.e. the code is +almost exclusively concerned with configuration that controls what to do +(rather than how to do it). + +```python +from tianshou.highlevel.config import OffPolicyTrainingConfig +from tianshou.highlevel.env import ( + EnvFactoryRegistered, + VectorEnvType, +) +from tianshou.highlevel.experiment import DQNExperimentBuilder, ExperimentConfig +from tianshou.highlevel.params.algorithm_params import DQNParams +from tianshou.highlevel.trainer import ( + EpochStopCallbackRewardThreshold, +) + +experiment = ( + DQNExperimentBuilder( + EnvFactoryRegistered( + task="CartPole-v1", + venv_type=VectorEnvType.DUMMY, + train_seed=0, + test_seed=10, + ), + ExperimentConfig( + persistence_enabled=False, + watch=True, + watch_render=1 / 35, + watch_num_episodes=100, + ), + OffPolicyTrainingConfig( + max_epochs=10, + epoch_num_steps=10000, + batch_size=64, + num_train_envs=10, + num_test_envs=100, + buffer_size=20000, + collection_step_num_env_steps=10, + update_step_num_gradient_steps_per_sample=1 / 10, + ), + ) + .with_dqn_params( + DQNParams( + lr=1e-3, + gamma=0.9, + n_step_return_horizon=3, + target_update_freq=320, + eps_training=0.3, + eps_inference=0.0, + ), + ) + .with_model_factory_default(hidden_sizes=(64, 64)) + .with_epoch_stop_callback(EpochStopCallbackRewardThreshold(195)) + .build() +) +experiment.run() +``` + +The experiment builder takes three arguments: + +- the environment factory for the creation of environments. In this case, + we use an existing factory implementation for gymnasium environments. +- the experiment configuration, which controls persistence and the overall + experiment flow. In this case, we have configured that we want to observe + the agent's behavior after it is trained (`watch=True`) for a number of + episodes (`watch_num_episodes=100`). We have disabled persistence, because + we do not want to save training logs, the agent or its configuration for + future use. +- the training configuration, which controls fundamental training parameters, + such as the total number of epochs we run the experiment for (`num_epochs=10`) + and the number of environment steps each epoch shall consist of + (`epoch_num_steps=10000`). + Every epoch consists of a series of data collection (rollout) steps and + training steps. + The parameter `collection_step_num_env_steps` controls the amount of data that is + collected in each collection step and after each collection step, we + perform a training step, applying a gradient-based update based on a sample + of data (`batch_size=64`) taken from the buffer of data that has been + collected. For further details, see the documentation of configuration class. + +We then proceed to configure some of the parameters of the DQN algorithm itself: +For instance, we control the epsilon parameter for exploration. +We want to use random exploration during rollouts for training (`eps_training`), +but we don't when evaluating the agent's performance in the test environments +(`eps_inference`). +Furthermore, we configure model parameters of the network for the Q function, +parametrising the number of hidden layers of the default MLP factory. + +Find the script in [examples/discrete/discrete_dqn_hl.py](examples/discrete/discrete_dqn_hl.py). +Here's a run (with the training time cut short): + +

+ +

+ +Find many further applications of the high-level API in the `examples/` folder; +look for scripts ending with `_hl.py`. +Note that most of these examples require the extra `argparse` +(install it by adding `--extras argparse` when invoking poetry). + +### Procedural API + +Let us now consider an analogous example in the procedural API. +Find the full script in [examples/discrete/discrete_dqn.py](https://github.com/thu-ml/tianshou/blob/master/examples/discrete/discrete_dqn.py). + +First, import the relevant packages: ```python import gymnasium as gym -import torch, numpy as np, torch.nn as nn +import torch from torch.utils.tensorboard import SummaryWriter import tianshou as ts ``` -Define some hyper-parameters: +Define hyper-parameters: ```python -task = 'CartPole-v0' +task = 'CartPole-v1' lr, epoch, batch_size = 1e-3, 10, 64 train_num, test_num = 10, 100 gamma, n_step, target_freq = 0.9, 3, 320 buffer_size = 20000 eps_train, eps_test = 0.1, 0.05 -step_per_epoch, step_per_collect = 10000, 10 -logger = ts.utils.TensorboardLogger(SummaryWriter('log/dqn')) # TensorBoard is supported! -# For other loggers: https://tianshou.readthedocs.io/en/master/tutorials/logger.html +epoch_num_steps, collection_step_num_env_steps = 10000, 10 +``` + +Initialize the logger: + +```python +logger = ts.utils.TensorboardLogger(SummaryWriter('log/dqn')) ``` Make environments: ```python -# you can also try with SubprocVectorEnv +# You can also try SubprocVectorEnv, which will use parallelization train_envs = ts.env.DummyVectorEnv([lambda: gym.make(task) for _ in range(train_num)]) test_envs = ts.env.DummyVectorEnv([lambda: gym.make(task) for _ in range(test_num)]) ``` -Define the network: +Create the network, policy, and algorithm: ```python from tianshou.utils.net.common import Net -# you can define other net by following the API: -# https://tianshou.readthedocs.io/en/master/tutorials/dqn.html#build-the-network -env = gym.make(task) +from tianshou.algorithm import DQN +from tianshou.algorithm.modelfree.dqn import DiscreteQLearningPolicy +from tianshou.algorithm.optim import AdamOptimizerFactory + +# Note: You can easily define other networks. +# See https://tianshou.readthedocs.io/en/master/01_tutorials/00_dqn.html#build-the-network +env = gym.make(task, render_mode="human") state_shape = env.observation_space.shape or env.observation_space.n action_shape = env.action_space.shape or env.action_space.n -net = Net(state_shape=state_shape, action_shape=action_shape, hidden_sizes=[128, 128, 128]) -optim = torch.optim.Adam(net.parameters(), lr=lr) +net = Net( + state_shape=state_shape, action_shape=action_shape, + hidden_sizes=[128, 128, 128] +) + +policy = DiscreteQLearningPolicy( + model=net, + action_space=env.action_space, + eps_training=eps_train, + eps_inference=eps_test +) + +# Create the algorithm with the policy and optimizer factory +algorithm = DQN( + policy=policy, + optim=AdamOptimizerFactory(lr=lr), + gamma=gamma, + n_step_return_horizon=n_step, + target_update_freq=target_freq +) ``` -Setup policy and collectors: +Set up the collectors: ```python -policy = ts.policy.DQNPolicy(net, optim, gamma, n_step, target_update_freq=target_freq) -train_collector = ts.data.Collector(policy, train_envs, ts.data.VectorReplayBuffer(buffer_size, train_num), exploration_noise=True) -test_collector = ts.data.Collector(policy, test_envs, exploration_noise=True) # because DQN uses epsilon-greedy method +train_collector = ts.data.Collector(policy, train_envs, + ts.data.VectorReplayBuffer(buffer_size, train_num), exploration_noise=True) +test_collector = ts.data.Collector(policy, test_envs, + exploration_noise=True) # because DQN uses epsilon-greedy method ``` -Let's train it: +Let's train it using the algorithm: ```python -result = ts.trainer.offpolicy_trainer( - policy, train_collector, test_collector, epoch, step_per_epoch, step_per_collect, - test_num, batch_size, update_per_step=1 / step_per_collect, +from tianshou.highlevel.config import OffPolicyTrainingConfig + +# Create training configuration +training_config = OffPolicyTrainingConfig( + max_epochs=epoch, + epoch_num_steps=epoch_num_steps, + batch_size=batch_size, + num_train_envs=train_num, + num_test_envs=test_num, + buffer_size=buffer_size, + collection_step_num_env_steps=collection_step_num_env_steps, + update_step_num_gradient_steps_per_sample=1 / collection_step_num_env_steps, + test_step_num_episodes=test_num, +) + +# Run training (trainer is created automatically by the algorithm) +result = algorithm.run_training( + training_config=training_config, + train_collector=train_collector, + test_collector=test_collector, + logger=logger, train_fn=lambda epoch, env_step: policy.set_eps(eps_train), test_fn=lambda epoch, env_step: policy.set_eps(eps_test), stop_fn=lambda mean_rewards: mean_rewards >= env.spec.reward_threshold, - logger=logger) -print(f'Finished training! Use {result["duration"]}') +) +print(f"Finished training in {result.timing.total_time} seconds") ``` -Save / load the trained policy (it's exactly the same as PyTorch `nn.module`): +Save/load the trained policy (it's exactly the same as loading a `torch.nn.module`): ```python torch.save(policy.state_dict(), 'dqn.pth') policy.load_state_dict(torch.load('dqn.pth')) ``` -Watch the performance with 35 FPS: +Watch the agent with 35 FPS: ```python policy.eval() @@ -260,27 +459,19 @@ collector = ts.data.Collector(policy, env, exploration_noise=True) collector.collect(n_episode=1, render=1 / 35) ``` -Look at the result saved in tensorboard: (with bash script in your terminal) +Inspect the data saved in TensorBoard: ```bash $ tensorboard --logdir log/dqn ``` -You can check out the [documentation](https://tianshou.readthedocs.io) for advanced usage. - -It's worth a try: here is a test on a laptop (i7-8750H + GTX1060). It only uses **3** seconds for training an agent based on vanilla policy gradient on the CartPole-v0 task: (seed may be different across different platform and device) - -```bash -$ python3 test/discrete/test_pg.py --seed 0 --render 0.03 -``` - -
- -
+Please read the [documentation](https://tianshou.readthedocs.io) for advanced usage. ## Contributing -Tianshou is still under development. More algorithms and features are going to be added and we always welcome contributions to help make Tianshou better. If you would like to contribute, please check out [this link](https://tianshou.readthedocs.io/en/master/contributing.html). +Tianshou is still under development. +Further algorithms and features are continuously being added, and we always welcome contributions to help make Tianshou better. +If you would like to contribute, please check out [this link](https://tianshou.org/en/master/04_contributing/04_contributing.html). ## Citing Tianshou @@ -299,7 +490,10 @@ If you find Tianshou useful, please cite it in your publications. } ``` -## Acknowledgment +## Acknowledgments + +Tianshou is supported by [appliedAI Institute for Europe](https://www.appliedai-institute.de/en/), +who is committed to providing long-term support and development. Tianshou was previously a reinforcement learning platform based on TensorFlow. You can check out the branch [`priv`](https://github.com/thu-ml/tianshou/tree/priv) for more detail. Many thanks to [Haosheng Zou](https://github.com/HaoshengZou)'s pioneering work for Tianshou before version 0.1.1. diff --git a/docs/.gitignore b/docs/.gitignore new file mode 100644 index 000000000..10bcdfd1e --- /dev/null +++ b/docs/.gitignore @@ -0,0 +1,4 @@ +/03_api/* +jupyter_execute +_toc.yml +.jupyter_cache diff --git a/docs/tutorials/dqn.rst b/docs/01_tutorials/00_dqn.rst similarity index 75% rename from docs/tutorials/dqn.rst rename to docs/01_tutorials/00_dqn.rst index b2c5844e2..3c28e7163 100644 --- a/docs/tutorials/dqn.rst +++ b/docs/01_tutorials/00_dqn.rst @@ -41,11 +41,11 @@ First of all, you have to make an environment for your agent to interact with. Y import gymnasium as gym import tianshou as ts - env = gym.make('CartPole-v0') + env = gym.make('CartPole-v1') -CartPole-v0 includes a cart carrying a pole moving on a track. This is a simple environment with a discrete action space, for which DQN applies. You have to identify whether the action space is continuous or discrete and apply eligible algorithms. DDPG :cite:`DDPG`, for example, could only be applied to continuous action spaces, while almost all other policy gradient methods could be applied to both. +CartPole-v1 includes a cart carrying a pole moving on a track. This is a simple environment with a discrete action space, for which DQN applies. You have to identify whether the action space is continuous or discrete and apply eligible algorithms. DDPG :cite:`DDPG`, for example, could only be applied to continuous action spaces, while almost all other policy gradient methods could be applied to both. -Here is the detail of useful fields of CartPole-v0: +Here is the detail of useful fields of CartPole-v1: - ``state``: the position of the cart, the velocity of the cart, the angle of the pole and the velocity of the tip of the pole; - ``action``: can only be one of ``[0, 1, 2]``, for moving the cart left, no move, and right; @@ -62,8 +62,8 @@ Setup Vectorized Environment If you want to use the original ``gym.Env``: :: - train_envs = gym.make('CartPole-v0') - test_envs = gym.make('CartPole-v0') + train_envs = gym.make('CartPole-v1') + test_envs = gym.make('CartPole-v1') Tianshou supports vectorized environment for all algorithms. It provides four types of vectorized environment wrapper: @@ -74,8 +74,8 @@ Tianshou supports vectorized environment for all algorithms. It provides four ty :: - train_envs = ts.env.DummyVectorEnv([lambda: gym.make('CartPole-v0') for _ in range(10)]) - test_envs = ts.env.DummyVectorEnv([lambda: gym.make('CartPole-v0') for _ in range(100)]) + train_envs = ts.env.DummyVectorEnv([lambda: gym.make('CartPole-v1') for _ in range(10)]) + test_envs = ts.env.DummyVectorEnv([lambda: gym.make('CartPole-v1') for _ in range(100)]) Here, we set up 10 environments in ``train_envs`` and 100 environments in ``test_envs``. @@ -84,8 +84,8 @@ You can also try the super-fast vectorized environment `EnvPool = env.spec.reward_threshold) - print(f'Finished training! Use {result["duration"]}') - -The meaning of each parameter is as follows (full description can be found at :func:`~tianshou.trainer.offpolicy_trainer`): + from tianshou.trainer import OffPolicyTrainerParams + + def train_fn(epoch, env_step): + policy.set_eps_training(0.1) + + def stop_fn(mean_rewards): + return mean_rewards >= env.spec.reward_threshold + + result = algorithm.run_training( + OffPolicyTrainerParams( + train_collector=train_collector, + test_collector=test_collector, + max_epochs=10, + epoch_num_steps=10000, + collection_step_num_env_steps=10, + test_step_num_episodes=100, + batch_size=64, + update_step_num_gradient_steps_per_sample=0.1, + train_fn=train_fn, + stop_fn=stop_fn, + ) + ) + print(f'Finished training! Use {result.duration}') + +The meaning of each parameter is as follows (full description can be found at :class:`~tianshou.trainer.OffpolicyTrainer`): * ``max_epoch``: The maximum of epochs for training. The training process might be finished before reaching the ``max_epoch``; -* ``step_per_epoch``: The number of environment step (a.k.a. transition) collected per epoch; -* ``step_per_collect``: The number of transition the collector would collect before the network update. For example, the code above means "collect 10 transitions and do one policy network update"; +* ``epoch_num_steps``: The number of environment step (a.k.a. transition) collected per epoch; +* ``collection_step_num_env_steps``: The number of transition the collector would collect before the network update. For example, the code above means "collect 10 transitions and do one policy network update"; * ``episode_per_test``: The number of episodes for one policy evaluation. * ``batch_size``: The batch size of sample data, which is going to feed in the policy network. * ``train_fn``: A function receives the current number of epoch and step index, and performs some operations at the beginning of training in this epoch. For example, the code above means "reset the epsilon to 0.1 in DQN before training". @@ -219,18 +261,10 @@ The returned result is a dictionary as follows: :: { - 'train_step': 9246, - 'train_episode': 504.0, - 'train_time/collector': '0.65s', - 'train_time/model': '1.97s', - 'train_speed': '3518.79 step/s', - 'test_step': 49112, - 'test_episode': 400.0, - 'test_time': '1.38s', - 'test_speed': '35600.52 step/s', - 'best_reward': 199.03, - 'duration': '4.01s' - } + TrainingResult object with attributes like: + best_reward: 199.03 + duration: 4.01s + And other training statistics It shows that within approximately 4 seconds, we finished training a DQN agent on CartPole. The mean returns over 100 consecutive episodes is 199.03. @@ -252,8 +286,8 @@ Watch the Agent's Performance :: policy.eval() - policy.set_eps(0.05) - collector = ts.data.Collector(policy, env, exploration_noise=True) + policy.set_eps_inference(0.05) + collector = ts.data.Collector(algorithm, env, exploration_noise=True) collector.collect(n_episode=1, render=1 / 35) If you'd like to manually see the action generated by a well-trained agent: @@ -276,26 +310,26 @@ Tianshou supports user-defined training code. Here is the code snippet: # pre-collect at least 5000 transitions with random action before training train_collector.collect(n_step=5000, random=True) - policy.set_eps(0.1) + policy.set_eps_training(0.1) for i in range(int(1e6)): # total step collect_result = train_collector.collect(n_step=10) # once if the collected episodes' mean returns reach the threshold, # or every 1000 steps, we test it on test_collector if collect_result['rews'].mean() >= env.spec.reward_threshold or i % 1000 == 0: - policy.set_eps(0.05) + policy.set_eps_inference(0.05) result = test_collector.collect(n_episode=100) if result['rews'].mean() >= env.spec.reward_threshold: print(f'Finished training! Test mean returns: {result["rews"].mean()}') break else: # back to training eps - policy.set_eps(0.1) + policy.set_eps_training(0.1) # train policy with a sampled batch data from buffer - losses = policy.update(64, train_collector.buffer) + losses = algorithm.update(64, train_collector.buffer) -For further usage, you can refer to the :doc:`/tutorials/cheatsheet`. +For further usage, you can refer to the :doc:`/01_tutorials/07_cheatsheet`. .. rubric:: References diff --git a/docs/tutorials/concepts.rst b/docs/01_tutorials/01_concepts.rst similarity index 61% rename from docs/tutorials/concepts.rst rename to docs/01_tutorials/01_concepts.rst index 79422bb6d..28b0dc276 100644 --- a/docs/tutorials/concepts.rst +++ b/docs/01_tutorials/01_concepts.rst @@ -1,18 +1,22 @@ Basic concepts in Tianshou ========================== -Tianshou splits a Reinforcement Learning agent training procedure into these parts: trainer, collector, policy, and data buffer. The general control flow can be described as: +Tianshou splits a Reinforcement Learning agent training procedure into these parts: algorithm, trainer, collector, policy, a data buffer and batches from the buffer. +The algorithm encapsulates the specific RL learning method (e.g., DQN, PPO), which contains a policy and defines how to update it. -.. image:: /_static/images/concepts_arch.png - :align: center - :height: 300 +.. + The general control flow can be described as: + .. image:: /_static/images/concepts_arch.png + :align: center + :height: 300 -Here is a more detailed description, where ``Env`` is the environment and ``Model`` is the neural network: -.. image:: /_static/images/concepts_arch2.png - :align: center - :height: 300 + Here is a more detailed description, where ``Env`` is the environment and ``Model`` is the neural network: + + .. image:: /_static/images/concepts_arch2.png + :align: center + :height: 300 Batch @@ -68,7 +72,7 @@ The current implementation of Tianshou typically use the following reserved keys * ``info`` the info of step :math:`t` (in ``gym.Env``, the ``env.step()`` function returns 4 arguments, and the last one is ``info``); * ``policy`` the data computed by policy in step :math:`t`; -When adding data to a replay buffer, the done flag will be inferred automatically from ``terminated``and ``truncated``. +When adding data to a replay buffer, the done flag will be inferred automatically from ``terminated`` or ``truncated``. The following code snippet illustrates the usage, including: @@ -139,7 +143,7 @@ The following code snippet illustrates the usage, including: >>> len(buf) 3 -:class:`~tianshou.data.ReplayBuffer` also supports frame_stack sampling (typically for RNN usage, see issue#19), ignoring storing the next observation (save memory in Atari tasks), and multi-modal observation (see issue#38): +:class:`~tianshou.data.ReplayBuffer` also supports "frame stack" sampling (typically for RNN usage, see `https://github.com/thu-ml/tianshou/issues/19`), ignoring storing the next observation (save memory in Atari tasks), and multi-modal observation (see `https://github.com/thu-ml/tianshou/issues/38`): .. raw:: html @@ -220,19 +224,28 @@ The following code snippet illustrates the usage, including: Tianshou provides other type of data buffer such as :class:`~tianshou.data.PrioritizedReplayBuffer` (based on Segment Tree and ``numpy.ndarray``) and :class:`~tianshou.data.VectorReplayBuffer` (add different episodes' data but without losing chronological order). Check out :class:`~tianshou.data.ReplayBuffer` for more detail. -Policy ------- +Algorithm and Policy +-------------------- + +Tianshou's RL framework is built around two key abstractions: :class:`~tianshou.algorithm.Algorithm` and :class:`~tianshou.algorithm.Policy`. + +**Algorithm**: The core abstraction that encapsulates a complete RL learning method (e.g., DQN, PPO, SAC). Each algorithm contains a policy and defines how to update it using training data. All algorithm classes inherit from :class:`~tianshou.algorithm.Algorithm`. + +An algorithm class typically has the following parts: + +* :meth:`~tianshou.algorithm.Algorithm.__init__`: initialize the algorithm with a policy and optimization configuration; +* :meth:`~tianshou.algorithm.Algorithm._preprocess_batch`: pre-process data from the replay buffer (e.g., compute n-step returns); +* :meth:`~tianshou.algorithm.Algorithm._update_with_batch`: the algorithm-specific network update logic; +* :meth:`~tianshou.algorithm.Algorithm._postprocess_batch`: post-process the batch data (e.g., update prioritized replay buffer weights); +* :meth:`~tianshou.algorithm.Algorithm.create_trainer`: create the appropriate trainer for this algorithm; -Tianshou aims to modularize RL algorithms. It comes into several classes of policies in Tianshou. All of the policy classes must inherit :class:`~tianshou.policy.BasePolicy`. +**Policy**: Represents the mapping from observations to actions. Policy classes inherit from :class:`~tianshou.algorithm.Policy`. -A policy class typically has the following parts: +A policy class typically provides: -* :meth:`~tianshou.policy.BasePolicy.__init__`: initialize the policy, including copying the target network and so on; -* :meth:`~tianshou.policy.BasePolicy.forward`: compute action with given observation; -* :meth:`~tianshou.policy.BasePolicy.process_fn`: pre-process data from the replay buffer; -* :meth:`~tianshou.policy.BasePolicy.learn`: update policy with a given batch of data. -* :meth:`~tianshou.policy.BasePolicy.post_process_fn`: update the buffer with a given batch of data. -* :meth:`~tianshou.policy.BasePolicy.update`: the main interface for training. This function samples data from buffer, pre-process data (such as computing n-step return), learn with the data, and finally post-process the data (such as updating prioritized replay buffer); in short, ``process_fn -> learn -> post_process_fn``. +* :meth:`~tianshou.algorithm.Policy.forward`: compute action distribution or Q-values given observations; +* :meth:`~tianshou.algorithm.Policy.compute_action`: get concrete actions from observations for environment interaction; +* :meth:`~tianshou.algorithm.Policy.map_action`: transform raw network outputs to environment action space; .. _policy_state: @@ -245,22 +258,10 @@ During the training process, the policy has two main states: training state and The meaning of training and testing state is obvious: the agent interacts with environment, collects training data and performs update, that's training state; the testing state is to evaluate the performance of the current policy during training process. As for the collecting state, it is defined as interacting with environments and collecting training data into the buffer; -we define the updating state as performing a model update by :meth:`~tianshou.policy.BasePolicy.update` during training process. +we define the updating state as performing a model update by the algorithm's update methods during training process. - -In order to distinguish these states, you can check the policy state by ``policy.training`` and ``policy.updating``. The state setting is as follows: - -+-----------------------------------+-----------------+-----------------+ -| State for policy | policy.training | policy.updating | -+================+==================+=================+=================+ -| | Collecting state | True | False | -| Training state +------------------+-----------------+-----------------+ -| | Updating state | True | True | -+----------------+------------------+-----------------+-----------------+ -| Testing state | False | False | -+-----------------------------------+-----------------+-----------------+ - -``policy.updating`` is helpful to distinguish the different exploration state, for example, in DQN we don't have to use epsilon-greedy in a pure network update, so ``policy.updating`` is helpful for setting epsilon in this case. +The collection of data from the env may differ in training and in inference (for example, in training one may add exploration noise, or sample from the predicted action distribution instead of taking its mode). The switch between the different collection strategies in training and inference is controlled by ``policy.is_within_training_step``, see also the docstring of it +for more details. policy.forward @@ -270,7 +271,7 @@ The ``forward`` function computes the action over given observations. The input The input batch is the environment data (e.g., observation, reward, done flag and info). It comes from either :meth:`~tianshou.data.Collector.collect` or :meth:`~tianshou.data.ReplayBuffer.sample`. The first dimension of all variables in the input ``batch`` should be equal to the batch-size. -The output is also a Batch which must contain "act" (action) and may contain "state" (hidden state of policy), "policy" (the intermediate result of policy which needs to save into the buffer, see :meth:`~tianshou.policy.BasePolicy.forward`), and some other algorithm-specific keys. +The output is also a ``Batch`` which must contain "act" (action) and may contain "state" (hidden state of policy), "policy" (the intermediate result of policy which needs to save into the buffer, see :meth:`~tianshou.algorithm.BasePolicy.forward`), and some other algorithm-specific keys. For example, if you try to use your policy to evaluate one episode (and don't want to use :meth:`~tianshou.data.Collector.collect`), use the following code-snippet: :: @@ -282,15 +283,17 @@ For example, if you try to use your policy to evaluate one episode (and don't wa act = policy(batch).act[0] # policy.forward return a batch, use ".act" to extract the action obs, rew, done, info = env.step(act) +For inference, it is recommended to use the shortcut method :meth:`~tianshou.algorithm.Policy.compute_action` to compute the action directly from the observation. + Here, ``Batch(obs=[obs])`` will automatically create the 0-dimension to be the batch-size. Otherwise, the network cannot determine the batch-size. .. _process_fn: -policy.process_fn -^^^^^^^^^^^^^^^^^ +Algorithm Preprocessing and N-step Returns +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -The ``process_fn`` function computes some variables that depends on time-series. For example, compute the N-step or GAE returns. +The algorithm handles data preprocessing, including computing variables that depend on time-series such as N-step or GAE returns. This functionality is implemented in :meth:`~tianshou.algorithm.Algorithm._preprocess_batch` and the static methods :meth:`~tianshou.algorithm.Algorithm.compute_nstep_return` and :meth:`~tianshou.algorithm.Algorithm.compute_episodic_return`. Take 2-step return DQN as an example. The 2-step return DQN compute each transition's return as: @@ -304,42 +307,19 @@ where :math:`\gamma` is the discount factor, :math:`\gamma \in [0, 1]`. Here is # pseudocode, cannot work obs = env.reset() buffer = Buffer(size=10000) - agent = DQN() + algorithm = DQN(...) for i in range(int(1e6)): - act = agent.compute_action(obs) + act = algorithm.policy.compute_action(obs) obs_next, rew, done, _ = env.step(act) buffer.store(obs, act, obs_next, rew, done) obs = obs_next if i % 1000 == 0: - b_s, b_a, b_s_, b_r, b_d = buffer.get(size=64) - # compute 2-step returns. How? - b_ret = compute_2_step_return(buffer, b_r, b_d, ...) - # update DQN policy - agent.update(b_s, b_a, b_s_, b_r, b_d, b_ret) - -Thus, we need a time-related interface for calculating the 2-step return. :meth:`~tianshou.policy.BasePolicy.process_fn` finishes this work by providing the replay buffer, the sample index, and the sample batch data. Since we store all the data in the order of time, you can simply compute the 2-step return as: -:: - - class DQN_2step(BasePolicy): - """some code""" + # algorithm handles sampling, preprocessing, and updating + algorithm.update(sample_size=64, buffer=buffer) - def process_fn(self, batch, buffer, indices): - buffer_len = len(buffer) - batch_2 = buffer[(indices + 2) % buffer_len] - # this will return a batch data where batch_2.obs is s_t+2 - # we can also get s_t+2 through: - # batch_2_obs = buffer.obs[(indices + 2) % buffer_len] - # in short, buffer.obs[i] is equal to buffer[i].obs, but the former is more effecient. - Q = self(batch_2, eps=0) # shape: [batchsize, action_shape] - maxQ = Q.max(dim=-1) - batch.returns = batch.rew \ - + self._gamma * buffer.rew[(indices + 1) % buffer_len] \ - + self._gamma ** 2 * maxQ - return batch +The algorithm's :meth:`~tianshou.algorithm.Algorithm._preprocess_batch` method automatically handles n-step return computation by calling :meth:`~tianshou.algorithm.Algorithm.compute_nstep_return`, which provides the replay buffer, sample indices, and batch data. Since we store all the data in the order of time, the n-step return can be computed efficiently using the buffer's temporal structure. -This code does not consider the done flag, so it may not work very well. It shows two ways to get :math:`s_{t + 2}` from the replay buffer easily in :meth:`~tianshou.policy.BasePolicy.process_fn`. - -For other method, you can check out :doc:`/api/tianshou.policy`. We give the usage of policy class a high-level explanation in :ref:`pseudocode`. +For custom preprocessing logic, you can override :meth:`~tianshou.algorithm.Algorithm._preprocess_batch` in your algorithm subclass. The method receives the sampled batch, buffer, and indices, allowing you to add computed values like returns, advantages, or other algorithm-specific preprocessing steps. Collector @@ -347,13 +327,13 @@ Collector The :class:`~tianshou.data.Collector` enables the policy to interact with different types of environments conveniently. -:meth:`~tianshou.data.Collector.collect` is the main method of Collector: it let the policy perform a specified number of step ``n_step`` or episode ``n_episode`` and store the data in the replay buffer, then return the statistics of the collected data such as episode's total reward. +:meth:`~tianshou.data.Collector.collect` is the main method of :class:`~tianshou.data.Collector`: it lets the policy perform a specified number of steps (``n_step``) or episodes (``n_episode``) and store the data in the replay buffer, then return the statistics of the collected data such as episode's total reward. The general explanation is listed in :ref:`pseudocode`. Other usages of collector are listed in :class:`~tianshou.data.Collector` documentation. Here are some example usages: :: - policy = PGPolicy(...) # or other policies if you wish - env = gym.make("CartPole-v0") + policy = DiscreteQLearningPolicy(...) # or other policies if you wish + env = gym.make("CartPole-v1") replay_buffer = ReplayBuffer(size=10000) @@ -363,7 +343,7 @@ The general explanation is listed in :ref:`pseudocode`. Other usages of collecto # the collector supports vectorized environments as well vec_buffer = VectorReplayBuffer(total_size=10000, buffer_num=3) # buffer_num should be equal to (suggested) or larger than #envs - envs = DummyVectorEnv([lambda: gym.make("CartPole-v0") for _ in range(3)]) + envs = DummyVectorEnv([lambda: gym.make("CartPole-v1") for _ in range(3)]) collector = Collector(policy, envs, buffer=vec_buffer) # collect 3 episodes @@ -380,29 +360,33 @@ There is also another type of collector :class:`~tianshou.data.AsyncCollector` w Trainer ------- -Once you have a collector and a policy, you can start writing the training method for your RL agent. Trainer, to be honest, is a simple wrapper. It helps you save energy for writing the training loop. You can also construct your own trainer: :ref:`customized_trainer`. +Once you have an algorithm and a collector, you can start the training process. The trainer orchestrates the training loop and calls upon the algorithm's specific network updating logic. Each algorithm creates its appropriate trainer type through the :meth:`~tianshou.algorithm.Algorithm.create_trainer` method. -Tianshou has three types of trainer: :func:`~tianshou.trainer.onpolicy_trainer` for on-policy algorithms such as Policy Gradient, :func:`~tianshou.trainer.offpolicy_trainer` for off-policy algorithms such as DQN, and :func:`~tianshou.trainer.offline_trainer` for offline algorithms such as BCQ. Please check out :doc:`/api/tianshou.trainer` for the usage. +Tianshou has three main trainer classes: :class:`~tianshou.trainer.OnPolicyTrainer` for on-policy algorithms such as Policy Gradient, :class:`~tianshou.trainer.OffPolicyTrainer` for off-policy algorithms such as DQN, and :class:`~tianshou.trainer.OfflineTrainer` for offline algorithms such as BCQ. -We also provide the corresponding iterator-based trainer classes :class:`~tianshou.trainer.OnpolicyTrainer`, :class:`~tianshou.trainer.OffpolicyTrainer`, :class:`~tianshou.trainer.OfflineTrainer` to facilitate users writing more flexible training logic: +The typical workflow is: :: - trainer = OnpolicyTrainer(...) - for epoch, epoch_stat, info in trainer: - print(f"Epoch: {epoch}") - print(epoch_stat) - print(info) - do_something_with_policy() - query_something_about_policy() - make_a_plot_with(epoch_stat) - display(info) + # Create algorithm with policy + algorithm = DQN(policy=policy, optim=optimizer_factory, ...) + + # Create trainer parameters + params = OffPolicyTrainerParams( + max_epochs=100, + step_per_epoch=1000, + train_collector=train_collector, + test_collector=test_collector, + ... + ) + + # Run training (trainer is created automatically) + result = algorithm.run_training(params) - # or even iterate on several trainers at the same time +You can also create trainers manually for more control: +:: - trainer1 = OnpolicyTrainer(...) - trainer2 = OnpolicyTrainer(...) - for result1, result2, ... in zip(trainer1, trainer2, ...): - compare_results(result1, result2, ...) + trainer = algorithm.create_trainer(params) + result = trainer.run() .. _pseudocode: @@ -416,22 +400,31 @@ We give a high-level explanation through the pseudocode used in section :ref:`pr # pseudocode, cannot work # methods in tianshou obs = env.reset() buffer = Buffer(size=10000) # buffer = tianshou.data.ReplayBuffer(size=10000) - agent = DQN() # policy.__init__(...) + algorithm = DQN(policy=policy, ...) # algorithm.__init__(...) for i in range(int(1e6)): # done in trainer - act = agent.compute_action(obs) # act = policy(batch, ...).act + act = algorithm.policy.compute_action(obs) # act = policy.compute_action(obs) obs_next, rew, done, _ = env.step(act) # collector.collect(...) buffer.store(obs, act, obs_next, rew, done) # collector.collect(...) obs = obs_next # collector.collect(...) if i % 1000 == 0: # done in trainer - # the following is done in policy.update(batch_size, buffer) + # the following is done in algorithm.update(batch_size, buffer) b_s, b_a, b_s_, b_r, b_d = buffer.get(size=64) # batch, indices = buffer.sample(batch_size) # compute 2-step returns. How? - b_ret = compute_2_step_return(buffer, b_r, b_d, ...) # policy.process_fn(batch, buffer, indices) + b_ret = compute_2_step_return(buffer, b_r, b_d, ...) # algorithm._preprocess_batch(batch, buffer, indices) # update DQN policy - agent.update(b_s, b_a, b_s_, b_r, b_d, b_ret) # policy.learn(batch, ...) + algorithm.update(b_s, b_a, b_s_, b_r, b_d, b_ret) # algorithm._update_with_batch(batch) Conclusion ---------- -So far, we go through the overall framework of Tianshou. Really simple, isn't it? +So far, we've covered the overall framework of Tianshou with its new architecture centered around the Algorithm abstraction. The key components are: + +- **Algorithm**: Encapsulates the complete RL learning method, containing a policy and defining how to update it +- **Policy**: Handles the mapping from observations to actions +- **Collector**: Manages environment interaction and data collection +- **Trainer**: Orchestrates the training loop and calls the algorithm's update logic +- **Buffer**: Stores and manages experience data +- **Batch**: A flexible data structure for passing data between components. Batches are collected to the buffer by the Collector and are sampled from the buffer by the `Algorithm` where they are used for learning. + +This modular design cleanly separates concerns while maintaining the flexibility to implement various RL algorithms. diff --git a/docs/tutorials/batch.rst b/docs/01_tutorials/03_batch.rst similarity index 95% rename from docs/tutorials/batch.rst rename to docs/01_tutorials/03_batch.rst index 71f82f84e..f778f0b71 100644 --- a/docs/tutorials/batch.rst +++ b/docs/01_tutorials/03_batch.rst @@ -324,35 +324,35 @@ Still, we can use a tree (in the right) to show the structure of ``Batch`` objec Reserved keys mean that in the future there will eventually be values attached to them. The values can be scalars, tensors, or even **Batch** objects. Understanding this is critical to understand the behavior of ``Batch`` when dealing with heterogeneous Batches. -The introduction of reserved keys gives rise to the need to check if a key is reserved. Tianshou provides ``Batch.is_empty`` to achieve this. +The introduction of reserved keys gives rise to the need to check if a key is reserved. .. raw:: html
- Examples of Batch.is_empty + Examples of checking whether Batch is empty .. code-block:: python - >>> Batch().is_empty() + >>> len(Batch().get_keys()) == 0 True - >>> Batch(a=Batch(), b=Batch(c=Batch())).is_empty() + >>> len(Batch(a=Batch(), b=Batch(c=Batch())).get_keys()) == 0 False - >>> Batch(a=Batch(), b=Batch(c=Batch())).is_empty(recurse=True) + >>> len(Batch(a=Batch(), b=Batch(c=Batch()))) == 0 True - >>> Batch(d=1).is_empty() + >>> len(Batch(d=1).get_keys()) == 0 False - >>> Batch(a=np.float64(1.0)).is_empty() + >>> len(Batch(a=np.float64(1.0)).get_keys()) == 0 False .. raw:: html

-The ``Batch.is_empty`` function has an option to decide whether to identify direct emptiness (just a ``Batch()``) or to identify recursive emptiness (a ``Batch`` object without any scalar/tensor leaf nodes). +To check whether a Batch is empty, simply use ``len(Batch.get_keys()) == 0`` to decide whether to identify direct emptiness (just a ``Batch()``) or ``len(Batch) == 0`` to identify recursive emptiness (a ``Batch`` object without any scalar/tensor leaf nodes). .. note:: - Do not get confused with ``Batch.is_empty`` and ``Batch.empty``. ``Batch.empty`` and its in-place variant ``Batch.empty_`` are used to set some values to zeros or None. Check the API documentation for further details. + Do not get confused with ``Batch.empty``. ``Batch.empty`` and its in-place variant ``Batch.empty_`` are used to set some values to zeros or None. Check the API documentation for further details. Length and Shape @@ -475,18 +475,18 @@ Miscellaneous Notes .. raw:: html
- Batch.to_torch and Batch.to_numpy + Batch.to_torch_ and Batch.to_numpy_ :: >>> data = Batch(a=np.zeros((3, 4))) - >>> data.to_torch(dtype=torch.float32, device='cpu') + >>> data.to_torch_(dtype=torch.float32, device='cpu') >>> print(data.a) tensor([[0., 0., 0., 0.], [0., 0., 0., 0.], [0., 0., 0., 0.]]) - >>> # data.to_numpy is also available - >>> data.to_numpy() + >>> # data.to_numpy_ is also available + >>> data.to_numpy_() .. raw:: html diff --git a/docs/tutorials/tictactoe.rst b/docs/01_tutorials/04_tictactoe.rst similarity index 85% rename from docs/tutorials/tictactoe.rst rename to docs/01_tutorials/04_tictactoe.rst index 962c91cf7..c5d6a87c0 100644 --- a/docs/tutorials/tictactoe.rst +++ b/docs/01_tutorials/04_tictactoe.rst @@ -1,13 +1,15 @@ -Multi-Agent RL -============== +RL against random policy opponent with PettingZoo +================================================= -Tianshou use `PettingZoo` environment for multi-agent RL training. Here are some helpful tutorial links: +Tianshou is compatible with `PettingZoo` environments for multi-agent RL, although does not directly provide facilities for multi-agent RL. Here are some helpful tutorial links: * https://pettingzoo.farama.org/tutorials/tianshou/beginner/ * https://pettingzoo.farama.org/tutorials/tianshou/intermediate/ * https://pettingzoo.farama.org/tutorials/tianshou/advanced/ -In this section, we describe how to use Tianshou to implement multi-agent reinforcement learning. Specifically, we will design an algorithm to learn how to play `Tic Tac Toe `_ (see the image below) against a random opponent. +In this section, we describe how to use Tianshou to implement RL in a multi-agent setting where, however, only one agent is trained, and the other one adopts a fixed random policy. +The user can then use this as a blueprint to replace the random policy with another trainable agent. +Specifically, we will design an algorithm to learn how to play `Tic Tac Toe `_ (see the image below) against a random opponent. .. image:: ../_static/images/tic-tac-toe.png :align: center @@ -16,7 +18,7 @@ In this section, we describe how to use Tianshou to implement multi-agent reinfo Tic-Tac-Toe Environment ----------------------- -The scripts are located at ``test/pettingzoo/``. We have implemented :class:`~tianshou.env.PettingZooEnv` which can wrap any `PettingZoo `_ environment. PettingZoo offers a 3x3 Tic-Tac-Toe environment, let's first explore it. +The scripts are located at ``test/pettingzoo/``. We have implemented :class:`~tianshou.env.PettingZooEnv` which can wrap any `PettingZoo `_ environment. PettingZoo offers a 3x3 Tic-Tac-Toe environment, let's first explore it. :: >>> from tianshou.env import PettingZooEnv # wrapper for PettingZoo environments @@ -120,18 +122,20 @@ Two Random Agents .. Figure:: ../_static/images/marl.png -Tianshou already provides some builtin classes for multi-agent learning. You can check out the API documentation for details. Here we use :class:`~tianshou.policy.RandomPolicy` and :class:`~tianshou.policy.MultiAgentPolicyManager`. The figure on the right gives an intuitive explanation. +Tianshou already provides some builtin classes for multi-agent learning. You can check out the API documentation for details. Here we use :class:`~tianshou.algorithm.MARLRandomPolicy` and :class:`~tianshou.algorithm.MultiAgentPolicyManager`. The figure on the right gives an intuitive explanation. :: >>> from tianshou.data import Collector >>> from tianshou.env import DummyVectorEnv - >>> from tianshou.policy import RandomPolicy, MultiAgentPolicyManager + >>> from tianshou.algorithm import RandomPolicy, MultiAgentPolicyManager >>> >>> # agents should be wrapped into one policy, >>> # which is responsible for calling the acting agent correctly >>> # here we use two random agents - >>> policy = MultiAgentPolicyManager([RandomPolicy(), RandomPolicy()], env) + >>> policy = MultiAgentPolicyManager( + >>> [RandomPolicy(action_space=env.action_space), RandomPolicy(action_space=env.action_space)], env + >>> ) >>> >>> # need to vectorize the environment for the collector >>> env = DummyVectorEnv([lambda: env]) @@ -174,8 +178,8 @@ Tianshou already provides some builtin classes for multi-agent learning. You can Random agents perform badly. In the above game, although agent 2 wins finally, it is clear that a smart agent 1 would place an ``x`` at row 4 col 4 to win directly. -Train an MARL Agent -------------------- +Train one Agent against a random opponent +----------------------------------------- So let's start to train our Tic-Tac-Toe agent! First, import some required modules. :: @@ -194,15 +198,15 @@ So let's start to train our Tic-Tac-Toe agent! First, import some required modul from tianshou.data import Collector, VectorReplayBuffer from tianshou.env import DummyVectorEnv from tianshou.env.pettingzoo_env import PettingZooEnv - from tianshou.policy import ( + from tianshou.algorithm import ( BasePolicy, DQNPolicy, MultiAgentPolicyManager, - RandomPolicy, + MARLRandomPolicy, ) - from tianshou.trainer import offpolicy_trainer + from tianshou.trainer import OffpolicyTrainer from tianshou.utils import TensorboardLogger - from tianshou.utils.net.common import Net + from tianshou.utils.net.common import MLPActor The explanation of each Tianshou class/function will be deferred to their first usages. Here we define some arguments and hyperparameters of the experiment. The meaning of arguments is clear by just looking at their names. :: @@ -220,15 +224,15 @@ The explanation of each Tianshou class/function will be deferred to their first parser.add_argument('--n-step', type=int, default=3) parser.add_argument('--target-update-freq', type=int, default=320) parser.add_argument('--epoch', type=int, default=50) - parser.add_argument('--step-per-epoch', type=int, default=1000) - parser.add_argument('--step-per-collect', type=int, default=10) + parser.add_argument('--epoch_num_steps', type=int, default=1000) + parser.add_argument('--collection_step_num_env_steps', type=int, default=10) parser.add_argument('--update-per-step', type=float, default=0.1) - parser.add_argument('--batch-size', type=int, default=64) + parser.add_argument('--batch_size', type=int, default=64) parser.add_argument( '--hidden-sizes', type=int, nargs='*', default=[128, 128, 128, 128] ) - parser.add_argument('--training-num', type=int, default=10) - parser.add_argument('--test-num', type=int, default=10) + parser.add_argument('--num_train_envs', type=int, default=10) + parser.add_argument('--num_test_envs', type=int, default=10) parser.add_argument('--logdir', type=str, default='log') parser.add_argument('--render', type=float, default=0.1) parser.add_argument( @@ -280,11 +284,11 @@ The explanation of each Tianshou class/function will be deferred to their first The following ``get_agents`` function returns agents and their optimizers from either constructing a new policy, or loading from disk, or using the pass-in arguments. For the models: -- The action model we use is an instance of :class:`~tianshou.utils.net.common.Net`, essentially a multi-layer perceptron with the ReLU activation function; -- The network model is passed to a :class:`~tianshou.policy.DQNPolicy`, where actions are selected according to both the action mask and their Q-values; -- The opponent can be either a random agent :class:`~tianshou.policy.RandomPolicy` that randomly chooses an action from legal actions, or it can be a pre-trained :class:`~tianshou.policy.DQNPolicy` allowing learned agents to play with themselves. +- The action model we use is an instance of :class:`~tianshou.utils.net.common.MLPActor`, essentially a multi-layer perceptron with the ReLU activation function; +- The network model is passed to a :class:`~tianshou.algorithm.DQNPolicy`, where actions are selected according to both the action mask and their Q-values; +- The opponent can be either a random agent :class:`~tianshou.algorithm.MARLRandomPolicy` that randomly chooses an action from legal actions, or it can be a pre-trained :class:`~tianshou.algorithm.DQNPolicy` allowing learned agents to play with themselves. -Both agents are passed to :class:`~tianshou.policy.MultiAgentPolicyManager`, which is responsible to call the correct agent according to the ``agent_id`` in the observation. :class:`~tianshou.policy.MultiAgentPolicyManager` also dispatches data to each agent according to ``agent_id``, so that each agent seems to play with a virtual single-agent environment. +Both agents are passed to :class:`~tianshou.algorithm.MultiAgentPolicyManager`, which is responsible to call the correct agent according to the ``agent_id`` in the observation. :class:`~tianshou.algorithm.MultiAgentPolicyManager` also dispatches data to each agent according to ``agent_id``, so that each agent seems to play with a virtual single-agent environment. Here it is: :: @@ -303,7 +307,7 @@ Here it is: args.action_shape = env.action_space.shape or env.action_space.n if agent_learn is None: # model - net = Net( + net = MLPActor( args.state_shape, args.action_shape, hidden_sizes=args.hidden_sizes, @@ -312,10 +316,11 @@ Here it is: if optim is None: optim = torch.optim.Adam(net.parameters(), lr=args.lr) agent_learn = DQNPolicy( - net, - optim, - args.gamma, - args.n_step, + model=net, + optim=optim, + gamma=args.gamma, + action_space=env.action_space, + estimate_space=args.n_step, target_update_freq=args.target_update_freq ) if args.resume_path: @@ -326,7 +331,7 @@ Here it is: agent_opponent = deepcopy(agent_learn) agent_opponent.load_state_dict(torch.load(args.opponent_path)) else: - agent_opponent = RandomPolicy() + agent_opponent = RandomPolicy(action_space=env.action_space) if args.agent_id == 1: agents = [agent_learn, agent_opponent] @@ -351,8 +356,8 @@ With the above preparation, we are close to the first learned agent. The followi ) -> Tuple[dict, BasePolicy]: # ======== environment setup ========= - train_envs = DummyVectorEnv([get_env for _ in range(args.training_num)]) - test_envs = DummyVectorEnv([get_env for _ in range(args.test_num)]) + train_envs = DummyVectorEnv([get_env for _ in range(args.num_train_envs)]) + test_envs = DummyVectorEnv([get_env for _ in range(args.num_test_envs)]) # seed np.random.seed(args.seed) torch.manual_seed(args.seed) @@ -373,7 +378,7 @@ With the above preparation, we are close to the first learned agent. The followi ) test_collector = Collector(policy, test_envs, exploration_noise=True) # policy.set_eps(1) - train_collector.collect(n_step=args.batch_size * args.training_num) + train_collector.collect(n_step=args.batch_size * args.num_train_envs) # ======== tensorboard logging setup ========= log_path = os.path.join(args.logdir, 'tic_tac_toe', 'dqn') @@ -406,14 +411,14 @@ With the above preparation, we are close to the first learned agent. The followi return rews[:, args.agent_id - 1] # trainer - result = offpolicy_trainer( + result = OffpolicyTrainer( policy, train_collector, test_collector, args.epoch, - args.step_per_epoch, - args.step_per_collect, - args.test_num, + args.epoch_num_steps, + args.collection_step_num_env_steps, + args.num_test_envs, args.batch_size, train_fn=train_fn, test_fn=test_fn, @@ -423,7 +428,7 @@ With the above preparation, we are close to the first learned agent. The followi logger=logger, test_in_train=False, reward_metric=reward_metric - ) + ).run() return result, policy.policies[agents[args.agent_id - 1]] @@ -537,7 +542,7 @@ That's it. By executing the code, you will see a progress bar indicating the pro

-Notice that, our learned agent plays the role of agent 2, placing ``o`` on the board. The agent performs pretty well against the random opponent! It learns the rule of the game by trial and error, and learns that four consecutive ``o`` means winning, so it does! +Notice that, our learned agent plays the role of agent 2, placing ``o`` on the board. The agent performs pretty well against the random opponent! It learns the rule of the game by trial and error, and learns that three consecutive ``o`` means winning, so it does! The above code can be executed in a python shell or can be saved as a script file (we have saved it in ``test/pettingzoo/test_tic_tac_toe.py``). In the latter case, you can train an agent by @@ -642,4 +647,4 @@ Well, although the learned agent plays well against the random agent, it is far Next, maybe you can try to build more intelligent agents by letting the agent learn from self-play, just like AlphaZero! -In this tutorial, we show an example of how to use Tianshou for multi-agent RL. Tianshou is a flexible and easy to use RL library. Make the best of Tianshou by yourself! +In this tutorial, we show an example of how to use Tianshou for training a single agent in a MARL setting. Tianshou is a flexible and easy to use RL library. Make the best of Tianshou by yourself! diff --git a/docs/tutorials/logger.rst b/docs/01_tutorials/05_logger.rst similarity index 100% rename from docs/tutorials/logger.rst rename to docs/01_tutorials/05_logger.rst diff --git a/docs/tutorials/benchmark.rst b/docs/01_tutorials/06_benchmark.rst similarity index 98% rename from docs/tutorials/benchmark.rst rename to docs/01_tutorials/06_benchmark.rst index b67ccbfd7..26f80eb2b 100644 --- a/docs/tutorials/benchmark.rst +++ b/docs/01_tutorials/06_benchmark.rst @@ -12,7 +12,7 @@ Every experiment is conducted under 10 random seeds for 1-10M steps. Please refe .. raw:: html
- +

@@ -101,7 +101,7 @@ Every experiment is conducted under 10 random seeds for 10M steps. Please refer .. raw:: html
- +

diff --git a/docs/tutorials/cheatsheet.rst b/docs/01_tutorials/07_cheatsheet.rst similarity index 97% rename from docs/tutorials/cheatsheet.rst rename to docs/01_tutorials/07_cheatsheet.rst index 7a02f2b72..fc747d66f 100644 --- a/docs/tutorials/cheatsheet.rst +++ b/docs/01_tutorials/07_cheatsheet.rst @@ -1,6 +1,8 @@ Cheat Sheet =========== +**IMPORTANT**: The content here has not yet been adjusted to the v2 version of Tianshou. It is partially outdated and will be updated soon. + This page shows some code snippets of how to use Tianshou to develop new algorithms / apply algorithms to new scenarios. @@ -23,7 +25,7 @@ See :ref:`build_the_network`. Build New Policy ---------------- -See :class:`~tianshou.policy.BasePolicy`. +See :class:`~tianshou.algorithm.BasePolicy`. .. _eval_policy: @@ -126,7 +128,7 @@ The figure in the right gives an intuitive comparison among synchronous/asynchro .. note:: The async simulation collector would cause some exceptions when used as - ``test_collector`` in :doc:`/api/tianshou.trainer` (related to + ``test_collector`` in :doc:`/03_api/trainer/index` (related to `Issue 700 `_). Please use sync version for ``test_collector`` instead. @@ -159,7 +161,7 @@ toy_text and classic_control environments. For more information, please refer to # install envpool: pip3 install envpool import envpool - envs = envpool.make_gymnasium("CartPole-v0", num_envs=10) + envs = envpool.make_gymnasium("CartPole-v1", num_envs=10) collector = Collector(policy, envs, buffer) Here are some other `examples `_. @@ -283,12 +285,12 @@ Multi-GPU Training To enable training an RL agent with multiple GPUs for a standard environment (i.e., without nested observation) with default networks provided by Tianshou: 1. Import :class:`~tianshou.utils.net.common.DataParallelNet` from ``tianshou.utils.net.common``; -2. Change the ``device`` argument to ``None`` in the existing networks such as ``Net``, ``Actor``, ``Critic``, ``ActorProb`` +2. Change the ``device`` argument to ``None`` in the existing networks such as ``MLPActor``, ``Actor``, ``Critic``, ``ActorProb`` 3. Apply ``DataParallelNet`` wrapper to these networks. :: - from tianshou.utils.net.common import Net, DataParallelNet + from tianshou.utils.net.common import MLPActor, DataParallelNet from tianshou.utils.net.discrete import Actor, Critic actor = DataParallelNet(Actor(net, args.action_shape, device=None).to(args.device)) @@ -478,4 +480,4 @@ By constructing a new state ``state_ = (state, agent_id, mask)``, essentially we act = policy(state_) next_state_, reward = env.step(act) -Following this idea, we write a tiny example of playing `Tic Tac Toe `_ against a random player by using a Q-learning algorithm. The tutorial is at :doc:`/tutorials/tictactoe`. +Following this idea, we write a tiny example of playing `Tic Tac Toe `_ against a random player by using a Q-learning algorithm. The tutorial is at :doc:`/01_tutorials/04_tictactoe`. diff --git a/docs/01_tutorials/index.rst b/docs/01_tutorials/index.rst new file mode 100644 index 000000000..f08b66f9f --- /dev/null +++ b/docs/01_tutorials/index.rst @@ -0,0 +1,2 @@ +Tutorials +========= \ No newline at end of file diff --git a/docs/02_notebooks/0_intro.md b/docs/02_notebooks/0_intro.md new file mode 100644 index 000000000..e68b36e63 --- /dev/null +++ b/docs/02_notebooks/0_intro.md @@ -0,0 +1,9 @@ +# Notebook Tutorials + +Here is a collection of executable tutorials for Tianshou. You can run them +directly in colab, or download them and run them locally. + +They will guide you step by step to show you how the most basic modules in Tianshou +work and how they collaborate with each other to conduct a classic DRL experiment. + +**IMPORTANT**: The notebooks are not yet adjusted to the v2 version of Tianshou! Their content is partly outdated and will be updated soon. diff --git a/docs/02_notebooks/L1_Batch.ipynb b/docs/02_notebooks/L1_Batch.ipynb new file mode 100644 index 000000000..d40869287 --- /dev/null +++ b/docs/02_notebooks/L1_Batch.ipynb @@ -0,0 +1,407 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "69y6AHvq1S3f" + }, + "source": [ + "# Batch\n", + "In this tutorial, we will introduce the **Batch** to you, which serves as the fundamental data structure in Tianshou. Think of Batch as a numpy-enhanced version of a Python dictionary. It is also similar to pytorch's tensordict,\n", + "although with a somehow different type structure." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "editable": true, + "id": "NkfiIe_y2FI-", + "outputId": "5008275f-8f77-489a-af64-b35af4448589", + "slideshow": { + "slide_type": "" + }, + "tags": [ + "remove-output", + "hide-cell" + ] + }, + "outputs": [], + "source": [ + "import pickle\n", + "\n", + "import numpy as np\n", + "import torch\n", + "\n", + "from tianshou.data import Batch" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "data = Batch(a=4, b=[5, 5], c=\"2312312\", d=(\"a\", -2, -3))\n", + "print(data)\n", + "print(data.b)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "S6e6OuXe3UT-" + }, + "source": [ + "A batch stores all passed in data as key-value pairs, and automatically turns the value into a numpy array if possible.\n", + "\n", + "## Why do we need Batch in Tianshou?\n", + "The motivation behind the implementation of Batch module is simple. In DRL, you need to handle a lot of dictionary-format data. For instance, most algorithms would require you to store state, action, and reward data for every step when interacting with the environment. All of them can be organized as a dictionary, and the\n", + " Batch class helps Tianshou in unifying the interfaces of a diverse set of algorithms. In addition, Batch supports advanced indexing, concatenation, and splitting, as well as printing formatted outputs akin to standard numpy arrays, proving invaluable for developers.\n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_Xenx64M9HhV" + }, + "source": [ + "## Basic Usages" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4YGX_f1Z9Uil" + }, + "source": [ + "### Initialization\n", + "Batch can be constructed directly from a python dictionary, and all data structures\n", + " will be converted to numpy arrays if possible." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Jl3-4BRbp3MM", + "outputId": "a8b225f6-2893-4716-c694-3c2ff558b7f0" + }, + "outputs": [], + "source": [ + "# converted from a python library\n", + "print(\"========================================\")\n", + "batch1 = Batch({\"a\": [4, 4], \"b\": (5, 5)})\n", + "print(batch1)\n", + "\n", + "# initialization of batch2 is equivalent to batch1\n", + "print(\"========================================\")\n", + "batch2 = Batch(a=[4, 4], b=(5, 5))\n", + "print(batch2)\n", + "\n", + "# the dictionary can be nested, and it will be turned into a nested Batch\n", + "print(\"========================================\")\n", + "data = {\n", + " \"action\": np.array([1.0, 2.0, 3.0]),\n", + " \"reward\": 3.66,\n", + " \"obs\": {\n", + " \"rgb_obs\": np.zeros((3, 3)),\n", + " \"flatten_obs\": np.ones(5),\n", + " },\n", + "}\n", + "\n", + "batch3 = Batch(data, extra=\"extra_string\")\n", + "print(batch3)\n", + "# batch3.obs is also a Batch\n", + "print(type(batch3.obs))\n", + "print(batch3.obs.rgb_obs)\n", + "\n", + "# a list of dictionary/Batch will automatically be concatenated/stacked, providing convenience if you\n", + "# want to use parallelized environments to collect data.\n", + "print(\"========================================\")\n", + "batch4 = Batch([data] * 3)\n", + "print(batch4)\n", + "print(batch4.obs.rgb_obs.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JCf6bqY3uf5L" + }, + "source": [ + "### Getting access to data\n", + "You can effortlessly search for or modify key-value pairs within a Batch, much like interacting with a Python dictionary." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "2TNIY90-vU9b", + "outputId": "de52ffe9-03c2-45f2-d95a-4071132daa4a" + }, + "outputs": [], + "source": [ + "batch1 = Batch({\"a\": [4, 4], \"b\": (5, 5)})\n", + "print(batch1)\n", + "\n", + "# add or delete key-value pair in batch1\n", + "print(\"========================================\")\n", + "batch1.c = Batch(c1=np.arange(3), c2=False)\n", + "del batch1.a\n", + "print(batch1)\n", + "\n", + "# access value by key\n", + "print(\"========================================\")\n", + "assert batch1[\"c\"] is batch1.c\n", + "print(\"c\" in batch1)\n", + "\n", + "# traverse the Batch\n", + "print(\"========================================\")\n", + "for key, value in batch1.items():\n", + " print(str(key) + \": \" + str(value))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bVywStbV9jD2" + }, + "source": [ + "### Indexing and Slicing\n", + "If all values in Batch share the same shape in certain dimensions, Batch can support array-like indexing and slicing." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "gKza3OJnzc_D", + "outputId": "4f240bfe-4a69-4c1b-b40e-983c5c4d0cbc" + }, + "outputs": [], + "source": [ + "# Let us suppose we have collected the data from stepping from 4 environments\n", + "step_outputs = [\n", + " {\n", + " \"act\": np.random.randint(10),\n", + " \"rew\": 0.0,\n", + " \"obs\": np.ones((3, 3)),\n", + " \"info\": {\"done\": np.random.choice(2), \"failed\": False},\n", + " \"terminated\": False,\n", + " \"truncated\": False,\n", + " }\n", + " for _ in range(4)\n", + "]\n", + "batch = Batch(step_outputs)\n", + "print(batch)\n", + "print(batch.shape)\n", + "\n", + "# advanced indexing is supported, if we only want to select data in a given set of environments\n", + "print(\"========================================\")\n", + "print(batch[0])\n", + "print(batch[[0, 3]])\n", + "\n", + "# slicing is also supported\n", + "print(\"========================================\")\n", + "print(batch[-2:])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Aggregation and Splitting\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1vUwQ-Hw9jtu" + }, + "source": [ + "Again, just like a numpy array. Play the example code below." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "f5UkReyn3_kb", + "outputId": "e7bb3324-7f20-4810-a328-479117efca55" + }, + "outputs": [], + "source": [ + "# concat batches with compatible keys\n", + "# try incompatible keys yourself if you feel curious\n", + "print(\"========================================\")\n", + "b1 = Batch(a=[{\"b\": np.float64(1.0), \"d\": Batch(e=np.array(3.0))}])\n", + "b2 = Batch(a=[{\"b\": np.float64(4.0), \"d\": {\"e\": np.array(6.0)}}])\n", + "b12_cat_out = Batch.cat([b1, b2])\n", + "print(b1)\n", + "print(b2)\n", + "print(b12_cat_out)\n", + "\n", + "# stack batches with compatible keys\n", + "# try incompatible keys yourself if you feel curious\n", + "print(\"========================================\")\n", + "b3 = Batch(a=np.zeros((3, 2)), b=np.ones((2, 3)), c=Batch(d=[[1], [2]]))\n", + "b4 = Batch(a=np.ones((3, 2)), b=np.ones((2, 3)), c=Batch(d=[[0], [3]]))\n", + "b34_stack = Batch.stack((b3, b4), axis=1)\n", + "print(b3)\n", + "print(b4)\n", + "print(b34_stack)\n", + "\n", + "# split the batch into small batches of size 1, breaking the order of the data\n", + "print(\"========================================\")\n", + "print(type(b34_stack.split(1)))\n", + "print(list(b34_stack.split(1, shuffle=True)))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Smc_W1Cx6zRS" + }, + "source": [ + "### Data type converting\n", + "Besides numpy array, Batch actually also supports Torch Tensor. The usages are exactly the same. Cool, isn't it?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Y6im_Mtb7Ody", + "outputId": "898e82c4-b940-4c35-a0f9-dedc4a9bc500" + }, + "outputs": [], + "source": [ + "batch1 = Batch(a=np.arange(2), b=torch.zeros((2, 2)))\n", + "batch2 = Batch(a=np.arange(2), b=torch.ones((2, 2)))\n", + "batch_cat = Batch.cat([batch1, batch2, batch1])\n", + "print(batch_cat)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1wfTUVKb6xki" + }, + "source": [ + "You can convert the data type easily, if you no longer want to use hybrid data type anymore." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "F7WknVs98DHD", + "outputId": "cfd0712a-1df3-4208-e6cc-9149840bdc40" + }, + "outputs": [], + "source": [ + "batch_cat.to_numpy_()\n", + "print(batch_cat)\n", + "batch_cat.to_torch_()\n", + "print(batch_cat)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NTFVle1-9Biz" + }, + "source": [ + "Batch is even serializable, just in case you may need to save it to disk or restore it." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Lnf17OXv9YRb", + "outputId": "753753f2-3f66-4d4b-b4ff-d57f9c40d1da" + }, + "outputs": [], + "source": [ + "batch = Batch(obs=Batch(a=0.0, c=torch.Tensor([1.0, 2.0])), np=np.zeros([3, 4]))\n", + "batch_pk = pickle.loads(pickle.dumps(batch))\n", + "print(batch_pk)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-vPMiPZ-9kJN" + }, + "source": [ + "## Further Reading" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8Oc1p8ud9kcu" + }, + "source": [ + "Would you like to learn more advanced usages of Batch? Feel curious about how data is organized inside the Batch? Check the [documentation](https://tianshou.readthedocs.io/en/master/03_api/data/batch.html) and other [tutorials](https://tianshou.readthedocs.io/en/master/01_tutorials/03_batch.html#) for more details." + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/docs/02_notebooks/L2_Buffer.ipynb b/docs/02_notebooks/L2_Buffer.ipynb new file mode 100644 index 000000000..4f51abca5 --- /dev/null +++ b/docs/02_notebooks/L2_Buffer.ipynb @@ -0,0 +1,427 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pickle\n", + "\n", + "import numpy as np\n", + "\n", + "from tianshou.data import Batch, ReplayBuffer" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xoPiGVD8LNma" + }, + "source": [ + "# Buffer\n", + "Replay Buffer is a very common module in DRL implementations. In Tianshou, the Buffer module can be viewed as a specialized form of Batch, designed to track all data trajectories and offering utilities like sampling methods beyond basic storage.\n", + "\n", + "There are many kinds of Buffer modules in Tianshou, two most basic ones are ReplayBuffer and VectorReplayBuffer. The later one is specially designed for parallelized environments (will introduce in tutorial [Vectorized Environment](https://tianshou.readthedocs.io/en/master/02_notebooks/L3_Vectorized__Environment.html)). In this tutorial, we will focus on ReplayBuffer." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "OdesCAxANehZ" + }, + "source": [ + "## Usages" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fUbLl9T_SrTR" + }, + "source": [ + "### Basic usages as a batch\n", + "Typically, a buffer stores all data in batches, employing a circular-queue mechanism." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "mocZ6IqZTH62", + "outputId": "66cc4181-c51b-4a47-aacf-666b92b7fc52" + }, + "outputs": [], + "source": [ + "# a buffer is initialised with its maxsize set to 10 (older data will be discarded if more data flow in).\n", + "print(\"========================================\")\n", + "dummy_buf = ReplayBuffer(size=10)\n", + "print(dummy_buf)\n", + "print(f\"maxsize: {dummy_buf.maxsize}, data length: {len(dummy_buf)}\")\n", + "\n", + "# add 3 steps of data into ReplayBuffer sequentially\n", + "print(\"========================================\")\n", + "for i in range(3):\n", + " dummy_buf.add(\n", + " Batch(obs=i, act=i, rew=i, terminated=0, truncated=0, done=0, obs_next=i + 1, info={}),\n", + " )\n", + "print(dummy_buf)\n", + "print(f\"maxsize: {dummy_buf.maxsize}, data length: {len(dummy_buf)}\")\n", + "\n", + "# add another 10 steps of data into ReplayBuffer sequentially\n", + "print(\"========================================\")\n", + "for i in range(3, 13):\n", + " dummy_buf.add(\n", + " Batch(obs=i, act=i, rew=i, terminated=0, truncated=0, done=0, obs_next=i + 1, info={}),\n", + " )\n", + "print(dummy_buf)\n", + "print(f\"maxsize: {dummy_buf.maxsize}, data length: {len(dummy_buf)}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "H8B85Y5yUfTy" + }, + "source": [ + "Just like Batch, ReplayBuffer supports concatenation, splitting, advanced slicing and indexing, etc." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cOX-ADOPNeEK", + "outputId": "f1a8ec01-b878-419b-f180-bdce3dee73e6" + }, + "outputs": [], + "source": [ + "print(dummy_buf[-1])\n", + "print(dummy_buf[-3:])\n", + "# Try more methods you find useful in Batch yourself." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vqldap-2WQBh" + }, + "source": [ + "ReplayBuffer can also be saved into local disk, still keeping track of the trajectories. This is extremely helpful in offline DRL settings." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Ppx0L3niNT5K" + }, + "outputs": [], + "source": [ + "_dummy_buf = pickle.loads(pickle.dumps(dummy_buf))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Eqezp0OyXn6J" + }, + "source": [ + "### Understanding reserved keys for buffer\n", + "As explained above, ReplayBuffer is specially designed to utilize the implementations of DRL algorithms. So, for convenience, we reserve certain nine reserved keys in Batch.\n", + "\n", + "* `obs`\n", + "* `act`\n", + "* `rew`\n", + "* `terminated`\n", + "* `truncated`\n", + "* `done`\n", + "* `obs_next`\n", + "* `info`\n", + "* `policy`\n", + "\n", + "The meaning of these nine reserved keys are consistent with the meaning in [Gymansium](https://gymnasium.farama.org/index.html#). We would recommend you simply use these nine keys when adding batched data into ReplayBuffer, because\n", + "some of them are tracked in ReplayBuffer (e.g. \"done\" value is tracked to help us determine a trajectory's start index and end index, together with its total reward and episode length.)\n", + "\n", + "```\n", + "buf.add(Batch(......, extro_info=0)) # This is okay but not recommended.\n", + "buf.add(Batch(......, info={\"extro_info\":0})) # Recommended.\n", + "```\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ueAbTspsc6jo" + }, + "source": [ + "### Data sampling\n", + "The primary purpose of maintaining a replay buffer in DRL is to sample data for training. `ReplayBuffer.sample()` and `ReplayBuffer.split(..., shuffle=True)` can both fulfill this need." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "P5xnYOhrchDl", + "outputId": "bcd2c970-efa6-43bb-8709-720d38f77bbd" + }, + "outputs": [], + "source": [ + "dummy_buf.sample(batch_size=5)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IWyaOSKOcgK4" + }, + "source": [ + "## Trajectory tracking\n", + "Compared to Batch, a unique feature of ReplayBuffer is that it can help you track the environment trajectories.\n", + "\n", + "First, let us simulate a situation, where we add three trajectories into the buffer. The last trajectory is still not finished yet." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "editable": true, + "id": "H0qRb6HLfhLB", + "outputId": "9bdb7d4e-b6ec-489f-a221-0bddf706d85b", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "trajectory_buffer = ReplayBuffer(size=10)\n", + "# Add the first trajectory (length is 3) into ReplayBuffer\n", + "print(\"========================================\")\n", + "for i in range(3):\n", + " result = trajectory_buffer.add(\n", + " Batch(\n", + " obs=i,\n", + " act=i,\n", + " rew=i,\n", + " terminated=1 if i == 2 else 0,\n", + " truncated=0,\n", + " done=i == 2,\n", + " obs_next=i + 1,\n", + " info={},\n", + " ),\n", + " )\n", + " print(result)\n", + "print(trajectory_buffer)\n", + "print(f\"maxsize: {trajectory_buffer.maxsize}, data length: {len(trajectory_buffer)}\")\n", + "\n", + "# Add the second trajectory (length is 5) into ReplayBuffer\n", + "print(\"========================================\")\n", + "for i in range(3, 8):\n", + " result = trajectory_buffer.add(\n", + " Batch(\n", + " obs=i,\n", + " act=i,\n", + " rew=i,\n", + " terminated=1 if i == 7 else 0,\n", + " truncated=0,\n", + " done=i == 7,\n", + " obs_next=i + 1,\n", + " info={},\n", + " ),\n", + " )\n", + " print(result)\n", + "print(trajectory_buffer)\n", + "print(f\"maxsize: {trajectory_buffer.maxsize}, data length: {len(trajectory_buffer)}\")\n", + "\n", + "# Add the third trajectory (length is 5, still not finished) into ReplayBuffer\n", + "print(\"========================================\")\n", + "for i in range(8, 13):\n", + " result = trajectory_buffer.add(\n", + " Batch(obs=i, act=i, rew=i, terminated=0, truncated=0, done=False, obs_next=i + 1, info={}),\n", + " )\n", + " print(result)\n", + "print(trajectory_buffer)\n", + "print(f\"maxsize: {trajectory_buffer.maxsize}, data length: {len(trajectory_buffer)}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dO7PWdb_hkXA" + }, + "source": [ + "### Episode length and rewards tracking\n", + "Notice that `ReplayBuffer.add()` returns a tuple of 4 numbers every time, meaning `(current_index, episode_reward, episode_length, episode_start_index)`. `episode_reward` and `episode_length` are valid only when a trajectory is finished. This might save developers some trouble.\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xbVc90z8itH0" + }, + "source": [ + "### Episode index management\n", + "In the ReplayBuffer above, we can get access to any data step by indexing.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "4mKwo54MjupY", + "outputId": "9ae14a7e-908b-44eb-afec-89b45bac5961" + }, + "outputs": [], + "source": [ + "print(trajectory_buffer)\n", + "print(\"========================================\")\n", + "\n", + "data = trajectory_buffer[6]\n", + "print(data)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "p5Co_Fmzj8Sw" + }, + "source": [ + "We know that step \"6\" is not the start of an episode - which should be step \"3\", since \"3-7\" is the second trajectory we add into the ReplayBuffer - but we wonder how do we get the earliest index of that episode.\n", + "\n", + "This may seem easy but actually it is not. We cannot simply look at the \"done\" flag preceding the start of a new episode, because since the third-added trajectory is not finished yet, step \"3\" is surrounded by flag \"False\". There are many things to consider. Things could get more nasty when using more advanced ReplayBuffer like VectorReplayBuffer, since it does not store the data in a simple circular-queue.\n", + "\n", + "Luckily, all ReplayBuffer instances help you identify step indexes through a unified API. One can simply input an array of indexes and look for their previous index in the episode." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# previous step of indexes [0, 1, 2, 3, 4, 5, 6] are:\n", + "print(trajectory_buffer.prev(np.array([0, 1, 2, 3, 4, 5, 6])))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4Wlb57V4lQyQ" + }, + "source": [ + "Using `ReplayBuffer.prev()`, we know that the earliest step of that episode is step \"3\". Similarly, `ReplayBuffer.next()` helps us identify the last index of an episode regardless of which kind of ReplayBuffer we are using." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "zl5TRMo7oOy5", + "outputId": "4a11612c-3ee0-4e74-b028-c8759e71fbdb" + }, + "outputs": [], + "source": [ + "# next step of indexes [4,5,6,7,8,9] are:\n", + "print(trajectory_buffer.next(np.array([4, 5, 6, 7, 8, 9])))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "YJ9CcWZXoOXw" + }, + "source": [ + "We can also search for the indexes which are labeled \"done: False\", but are the last step in a trajectory." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Xkawk97NpItg", + "outputId": "df10b359-c2c7-42ca-e50d-9caee6bccadd" + }, + "outputs": [], + "source": [ + "print(trajectory_buffer.unfinished_index())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8_lMr0j3pOmn" + }, + "source": [ + "Aforementioned APIs will be helpful when we calculate quantities like GAE and n-step-returns in DRL algorithms ([Example usage in Tianshou](https://github.com/thu-ml/tianshou/blob/6fc68578127387522424460790cbcb32a2bd43c4/tianshou/policy/base.py#L384)). The unified APIs ensure a modular design and a flexible interface." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FEyE0c7tNfwa" + }, + "source": [ + "## Further Reading\n", + "### Other Buffer Module\n", + "\n", + "* PrioritizedReplayBuffer, which helps you implement [prioritized experience replay](https://arxiv.org/abs/1511.05952)\n", + "* CachedReplayBuffer, one main buffer with several cached buffers (higher sample efficiency in some scenarios)\n", + "* ReplayBufferManager, A base class that can be inherited (may help you manage multiple buffers).\n", + "\n", + "Refer to the documentation and source code for further details.\n", + "\n", + "### Support for steps stacking to use RNN in DRL.\n", + "There is an option called `stack_num` (default to 1) when initializing the ReplayBuffer, which may help you use RNN in your algorithm. Check the documentation for details." + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/docs/02_notebooks/L3_Vectorized__Environment.ipynb b/docs/02_notebooks/L3_Vectorized__Environment.ipynb new file mode 100644 index 000000000..19e5489a2 --- /dev/null +++ b/docs/02_notebooks/L3_Vectorized__Environment.ipynb @@ -0,0 +1,229 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "W5V7z3fVX7_b" + }, + "source": [ + "# Vectorized Environment\n", + "In reinforcement learning, an agent engages with environments to enhance its performance. In this tutorial we will concentrate on the environment part. Although there are many kinds of environments or their libraries in DRL research, Tianshou chooses to keep a consistent API with [OPENAI Gym](https://gym.openai.com/).\n", + "\n", + "
\n", + "\n", + "\n", + " The agents interacting with the environment \n", + "
\n", + "\n", + "In Gym, an environment receives an action and returns next observation and reward. This process is slow and sometimes can be the throughput bottleneck in a DRL experiment.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "A0NGWZ8adBwt" + }, + "source": [ + "Tianshou provides vectorized environment wrapper for a Gym environment. This wrapper allows you to make use of multiple cpu cores in your server to accelerate the data sampling." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "editable": true, + "id": "67wKtkiNi3lb", + "outputId": "1e04353b-7a91-4c32-e2ae-f3889d58aa5e", + "slideshow": { + "slide_type": "" + }, + "tags": [ + "remove-output", + "hide-cell" + ] + }, + "outputs": [], + "source": [ + "import time\n", + "\n", + "import gymnasium as gym\n", + "import numpy as np\n", + "\n", + "from tianshou.env import DummyVectorEnv, SubprocVectorEnv" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "num_cpus = [1, 2, 5]\n", + "for num_cpu in num_cpus:\n", + " env = SubprocVectorEnv([lambda: gym.make(\"CartPole-v1\") for _ in range(num_cpu)])\n", + " env.reset()\n", + " sampled_steps = 0\n", + " time_start = time.time()\n", + " while sampled_steps < 1000:\n", + " act = np.random.choice(2, size=num_cpu)\n", + " obs, rew, terminated, truncated, info = env.step(act)\n", + " if np.sum(terminated):\n", + " env.reset(np.where(terminated)[0])\n", + " sampled_steps += num_cpu\n", + " time_used = time.time() - time_start\n", + " print(f\"{time_used}s used to sample 1000 steps if using {num_cpu} cpus.\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "S1b6vxp9nEUS" + }, + "source": [ + "You may notice that the speed doesn't increase linearly when we add subprocess numbers. There are multiple reasons behind this. One reason is that synchronize exception causes straggler effect. One way to solve this would be to use asynchronous mode. We leave this for further reading if you feel interested.\n", + "\n", + "Note that SubprocVectorEnv should only be used when the environment execution is slow. In practice, DummyVectorEnv (or raw Gym environment) is actually more efficient for a simple environment like CartPole because now you avoid both straggler effect and the overhead of communication between subprocesses." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Z6yPxdqFp18j" + }, + "source": [ + "## Usages\n", + "### Initialization\n", + "Just pass in a list of functions which return the initialized environment upon called." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ssLcrL_pq24-" + }, + "outputs": [], + "source": [ + "# In Gym\n", + "gym_env = gym.make(\"CartPole-v1\")\n", + "\n", + "\n", + "# In Tianshou\n", + "def create_cartpole_env() -> gym.Env:\n", + " return gym.make(\"CartPole-v1\")\n", + "\n", + "\n", + "# We can distribute the environments on the available cpus, which we assume to be 5 in this case\n", + "vector_env = DummyVectorEnv([create_cartpole_env for _ in range(5)])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "X7p8csjdrwIN" + }, + "source": [ + "### EnvPool supporting\n", + "Besides integrated environment wrappers, Tianshou also fully supports [EnvPool](https://github.com/sail-sg/envpool/). Explore its Github page yourself." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "kvIfqh0vqAR5" + }, + "source": [ + "### Environment execution and resetting\n", + "The only difference between Vectorized environments and standard Gym environments is that passed in actions and returned rewards/observations are also vectorized." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "BH1ZnPG6tkdD" + }, + "outputs": [], + "source": [ + "# In gymnasium, env.reset() returns an observation, info tuple\n", + "print(\"In Gym, env.reset() returns a single observation.\")\n", + "print(gym_env.reset())\n", + "\n", + "# In Tianshou, envs.reset() returns stacked observations.\n", + "print(\"========================================\")\n", + "print(\"In Tianshou, a VectorEnv's reset() returns stacked observations.\")\n", + "print(vector_env.reset())\n", + "\n", + "info = vector_env.step(np.random.choice(2, size=vector_env.env_num))[4]\n", + "print(info)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qXroB7KluvP9" + }, + "source": [ + "If we only want to execute several environments. The `id` argument can be used." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ufvFViKTu8d_" + }, + "outputs": [], + "source": [ + "info = vector_env.step(np.random.choice(2, size=3), id=[0, 3, 1])[4]\n", + "print(info)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fekHR1a6X_HB" + }, + "source": [ + "## Further Reading\n", + "### Other environment wrappers in Tianshou\n", + "\n", + "\n", + "* ShmemVectorEnv: use share memory instead of pipe based on SubprocVectorEnv;\n", + "* RayVectorEnv: use Ray for concurrent activities and is currently the only choice for parallel simulation in a cluster with multiple machines.\n", + "\n", + "Check the [documentation](https://tianshou.org/en/master/03_api/env/venvs.html) for details.\n", + "\n", + "### Difference between synchronous and asynchronous mode (How to choose?)\n", + "For further insights, refer to the [Parallel Sampling](https://tianshou.org/en/master/01_tutorials/07_cheatsheet.html#parallel-sampling) tutorial." + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/docs/02_notebooks/L4_GAE.ipynb b/docs/02_notebooks/L4_GAE.ipynb new file mode 100644 index 000000000..8393d6f92 --- /dev/null +++ b/docs/02_notebooks/L4_GAE.ipynb @@ -0,0 +1,265 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "QJ5krjrcbuiA" + }, + "source": [ + "# Notes on Generalized Advantage Estimation\n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UPVl5LBEWJ0t" + }, + "source": [ + "## How to compute GAE on your own?\n", + "(Note that for this reading you need to understand the calculation of [GAE](https://arxiv.org/abs/1506.02438) advantage first)\n", + "\n", + "In terms of code implementation, perhaps the most difficult and annoying part is computing GAE advantage. Just now, we use the `self.compute_episodic_return()` method inherited from `BasePolicy` to save us from all those troubles. However, it is still important that we know the details behind this.\n", + "\n", + "To compute GAE advantage, the usage of `self.compute_episodic_return()` may go like:" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "D34GlVvPNz08", + "outputId": "43a4e5df-59b5-4e4a-c61c-e69090810215" + }, + "source": [ + "```python\n", + "batch, indices = dummy_buffer.sample(0) # 0 means sampling all the data from the buffer\n", + "returns, advantage = Algorithm.compute_episodic_return(\n", + " batch=batch,\n", + " buffer=dummy_buffer,\n", + " indices=indices,\n", + " v_s_=np.zeros(10),\n", + " v_s=np.zeros(10),\n", + " gamma=1.0,\n", + " gae_lambda=1.0,\n", + ")\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In the code above, we sample all the 10 data in the buffer and try to compute the GAE advantage. However, the way the returns are computed here might be a bit misleading. In fact, the last episode is unfinished, but its last step saved in the batch is treated as a terminal state, since it assumes that there are no future rewards. The episode is not terminated yet, it is truncated, so the agent could still get rewards in the future. Terminated and truncated episodes should indeed be treated differently.\n", + "The return of a step is the (discounted) sum of the future rewards from that step until the end of the episode. \n", + "\\begin{equation}\n", + "R_{t}=\\sum_{t}^{T} \\gamma^{t} r_{t}\n", + "\\end{equation}\n", + "Thus, at the last step of a terminated episode the return is equal to the reward at that state, since there are no future states.\n", + "\\begin{equation}\n", + "R_{T,terminated}=r_{T}\n", + "\\end{equation}\n", + "\n", + "However, if the episode was truncated the return at the last step is usually better represented by the estimated value of that state, which is the expected return from that state onwards.\n", + "\\begin{align*}\n", + "R_{T,truncated}=V^{\\pi}\\left(s_{T}\\right) \\quad & \\text{or} \\quad R_{T,truncated}=Q^{\\pi}(s_{T},a_{T})\n", + "\\end{align*}\n", + "Moreover, if the next state was also observed (but not its reward), then an even better estimate would be the reward of the last step plus the discounted value of the next state.\n", + "\\begin{align*}\n", + "R_{T,truncated}=r_T+\\gamma V^{\\pi}\\left(s_{T+1}\\right)\n", + "\\end{align*}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "h_5Dt6XwQLXV" + }, + "source": [ + "\n", + "As we know, we need to estimate the value function of every observation to compute GAE advantage. So in `v_s` is the value of `batch.obs`, and in `v_s_` is the value of `batch.obs_next`. This is usually computed by:\n", + "\n", + "`v_s = critic(batch.obs)`,\n", + "\n", + "`v_s_ = critic(batch.obs_next)`,\n", + "\n", + "where both `v_s` and `v_s_` are 10 dimensional arrays and `critic` is usually a neural network.\n", + "\n", + "After we've got all those values, GAE can be computed following the equation below." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ooHNIICGUO19" + }, + "source": [ + "\\begin{aligned}\n", + "\\hat{A}_{t}^{\\mathrm{GAE}(\\gamma, \\lambda)}: =& \\sum_{l=0}^{\\infty}(\\gamma \\lambda)^{l} \\delta_{t+l}^{V}\n", + "\\end{aligned}\n", + "\n", + "where\n", + "\n", + "\\begin{equation}\n", + "\\delta_{t}^{V} \\quad=-V\\left(s_{t}\\right)+r_{t}+\\gamma V\\left(s_{t+1}\\right)\n", + "\\end{equation}\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "eV6XZaouU7EV" + }, + "source": [ + "Unfortunately, if you follow this equation, which is taken from the paper, you probably will get a slightly lower performance than you expected. There are at least 3 \"bugs\" in this equation." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FCxD9gNNVYbd" + }, + "source": [ + "**First** is that Gym always returns you a `obs_next` even if this is already the last step. The value of this timestep is exactly 0 and you should not let the neural network estimate it." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "rNZNUNgQVvRJ", + "outputId": "44354595-c25a-4da8-b4d8-cffa31ac4b7d" + }, + "source": [ + "```python\n", + "# Assume v_s_ is got by calling critic(batch.obs_next)\n", + "v_s_ = np.ones(10)\n", + "v_s_ *= ~batch.done\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "2EtMi18QWXTN" + }, + "source": [ + "After the fix above, we will perhaps get a more accurate estimate.\n", + "\n", + "**Secondly**, you must know when to stop bootstrapping. Usually we stop bootstrapping when we meet a `done` flag. However, in the buffer above, the last (10th) step is not marked by done=True, because the collecting has not finished. We must know all those unfinished steps so that we know when to stop bootstrapping.\n", + "\n", + "Luckily, this can be done under the assistance of buffer because buffers in Tianshou not only store data, but also help you manage data trajectories." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "saluvX4JU6bC", + "outputId": "2994d178-2f33-40a0-a6e4-067916b0b5c5" + }, + "source": [ + "```python\n", + "unfinished_indexes = dummy_buffer.unfinished_index()\n", + "done_indexes = np.where(batch.done)[0]\n", + "stop_bootstrap_ids = np.concatenate([unfinished_indexes, done_indexes])\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qp6vVE4dYWv1" + }, + "source": [ + "**Thirdly**, there are some special indexes which are marked by done flag, however its value for obs_next should not be zero. It is again because done does not differentiate between terminated and truncated. These steps are usually those at the last step of an episode, but this episode stops not because the agent can no longer get any rewards (value=0), but because the episode is too long so we have to truncate it. These kind of steps are always marked with `info['TimeLimit.truncated']=True` in Gym." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tWkqXRJfZTvV" + }, + "source": [ + "As a result, we need to rewrite the equation above\n", + "\n", + "`v_s_ *= ~batch.done`" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "kms-QtxKZe-M" + }, + "source": [ + "to\n", + "\n", + "```\n", + "mask = batch.info['TimeLimit.truncated'] | (~batch.done)\n", + "v_s_ *= mask\n", + "\n", + "```\n", + "\n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "u_aPPoKraBu6" + }, + "source": [ + "## Summary\n", + "If you already felt bored by now, simply remember that Tianshou can help handle all these little details so that you can focus on the algorithm itself. Just call `Algorithm.compute_episodic_return()`.\n", + "\n", + "If you still feel interested, we would recommend you check Appendix C in this [paper](https://arxiv.org/abs/2107.14171v2) and implementation of `Algorithm.value_mask()` and `Algorithm.compute_episodic_return()` for details." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "2cPnUXRBWKD9" + }, + "source": [ + "
\n", + "\n", + "
\n", + "
\n", + "\n", + "
" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.7" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/docs/02_notebooks/L5_Collector.ipynb b/docs/02_notebooks/L5_Collector.ipynb new file mode 100644 index 000000000..a52dd25eb --- /dev/null +++ b/docs/02_notebooks/L5_Collector.ipynb @@ -0,0 +1,271 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "M98bqxdMsTXK" + }, + "source": [ + "# Collector\n", + "From its literal meaning, we can easily know that the Collector in Tianshou is used to collect training data. More specifically, the Collector controls the interaction between Policy (agent) and the environment. It also helps save the interaction data into the ReplayBuffer and returns episode statistics.\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "OX5cayLv4Ziu" + }, + "source": [ + "## Usages\n", + "Collector can be used both for training (data collecting) and evaluation in Tianshou." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Z6XKbj28u8Ze" + }, + "source": [ + "### Policy evaluation\n", + "We need to evaluate our trained policy from time to time in DRL experiments. Collector can help us with this.\n", + "\n", + "First we have to initialize a Collector with an (vectorized) environment and a given policy (agent)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "editable": true, + "id": "w8t9ubO7u69J", + "slideshow": { + "slide_type": "" + }, + "tags": [ + "hide-cell", + "remove-output" + ] + }, + "outputs": [], + "source": [ + "import gymnasium as gym\n", + "import torch\n", + "\n", + "from tianshou.algorithm.modelfree.reinforce import ProbabilisticActorPolicy" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from tianshou.data import Collector, CollectStats, VectorReplayBuffer\n", + "from tianshou.env import DummyVectorEnv\n", + "from tianshou.utils.net.common import Net\n", + "from tianshou.utils.net.discrete import DiscreteActor\n", + "\n", + "env = gym.make(\"CartPole-v1\")\n", + "test_envs = DummyVectorEnv([lambda: gym.make(\"CartPole-v1\") for _ in range(2)])\n", + "\n", + "# model\n", + "assert env.observation_space.shape is not None # for mypy\n", + "preprocess_net = Net(\n", + " state_shape=env.observation_space.shape,\n", + " hidden_sizes=[\n", + " 16,\n", + " ],\n", + ")\n", + "\n", + "assert isinstance(env.action_space, gym.spaces.Discrete) # for mypy\n", + "actor = DiscreteActor(preprocess_net=preprocess_net, action_shape=env.action_space.n)\n", + "\n", + "policy = ProbabilisticActorPolicy(\n", + " actor=actor,\n", + " dist_fn=torch.distributions.Categorical,\n", + " action_space=env.action_space,\n", + " action_scaling=False,\n", + ")\n", + "test_collector = Collector[CollectStats](policy, test_envs)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wmt8vuwpzQdR" + }, + "source": [ + "Now we would like to collect 9 episodes of data to test how our initialized Policy performs." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "9SuT6MClyjyH", + "outputId": "1e48f13b-c1fe-4fc2-ca1b-669485efdcae" + }, + "outputs": [], + "source": [ + "collect_result = test_collector.collect(reset_before_collect=True, n_episode=9)\n", + "\n", + "collect_result.pprint_asdict()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zX9AQY0M0R3C" + }, + "source": [ + "Now we wonder what is the performance of a random policy." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "UEcs8P8P0RLt", + "outputId": "85f02f9d-b79b-48b2-99c6-36a1602f0884" + }, + "outputs": [], + "source": [ + "# Reset the collector\n", + "collect_result = test_collector.collect(reset_before_collect=True, n_episode=9, random=True)\n", + "\n", + "collect_result.pprint_asdict()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "sKQRTiG10ljU" + }, + "source": [ + "It seems like an initialized policy performs even worse than a random policy without any training." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8RKmHIoG1A1k" + }, + "source": [ + "### Data Collecting\n", + "Data collecting is mostly used during training, when we need to store the collected data in a ReplayBuffer." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "editable": true, + "id": "CB9XB9bF1YPC", + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "train_env_num = 4\n", + "buffer_size = 100\n", + "train_envs = DummyVectorEnv([lambda: gym.make(\"CartPole-v1\") for _ in range(train_env_num)])\n", + "replayBuffer = VectorReplayBuffer(buffer_size, train_env_num)\n", + "\n", + "train_collector = Collector[CollectStats](policy, train_envs, replayBuffer)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rWKDazA42IUQ" + }, + "source": [ + "Now we can collect 50 steps of data, which will be automatically saved in the replay buffer. You can still choose to collect a certain number of episodes rather than steps. Try it yourself." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "-fUtQOnM2Yi1", + "outputId": "dceee987-433e-4b75-ed9e-823c20a9e1c2" + }, + "outputs": [], + "source": [ + "train_collector.reset()\n", + "replayBuffer.reset()\n", + "\n", + "print(f\"Replay buffer before collecting is empty, and has length={len(replayBuffer)} \\n\")\n", + "n_step = 50\n", + "collect_result = train_collector.collect(n_step=n_step)\n", + "print(\n", + " f\"Replay buffer after collecting {n_step} steps has length={len(replayBuffer)}.\\n\"\n", + " f\"This may exceed n_step when it is not a multiple of train_env_num because of vectorization.\\n\",\n", + ")\n", + "collect_result.pprint_asdict()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Sample some data from the replay buffer." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "replayBuffer.sample(10)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8NP7lOBU3-VS" + }, + "source": [ + "## Further Reading\n", + "The above collector actually collects 52 data at a time because 52 % 4 = 0. There is one asynchronous collector which allows you collect exactly 50 steps. Check the [documentation](https://tianshou.org/en/master/03_api/data/collector.html#tianshou.data.collector.AsyncCollector) for details." + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/docs/04_contributing/04_contributing.rst b/docs/04_contributing/04_contributing.rst new file mode 100644 index 000000000..48cf172c8 --- /dev/null +++ b/docs/04_contributing/04_contributing.rst @@ -0,0 +1,148 @@ +Contributing to Tianshou +======================== + + +Install Development Environment +------------------------------- + +Tianshou is built and managed by `poetry `_. For example, +to install all relevant requirements (and install Tianshou itself in editable mode) +you can simply call + +.. code-block:: bash + + $ poetry install --with dev + + +Platform-Specific Configuration +------------------------------- + +**Windows**: +Since the repository contains symbolic links, make sure this is supported: + + * Enable Windows Developer Mode to allow symbolic links to be created: Search Start Menu for "Developer Settings" and enable "Developer Mode" + * Enable symbolic links for this repository: ``git config core.symlinks true`` + * Re-checkout the current git state: ``git checkout .`` + + +PEP8 Code Style Check and Formatting +---------------------------------------- + +Please set up pre-commit by running + +.. code-block:: bash + + $ pre-commit install + +in the main directory. This should make sure that your contribution is properly +formatted before every commit. + +The code is inspected and formatted by ``black`` and ``ruff``. They are executed as +pre-commit hooks. In addition, ``poe the poet`` tasks are configured. +Simply run ``poe`` to see the available tasks. +E.g, to format and check the linting manually you can run: + +.. code-block:: bash + + $ poe format + $ poe lint + + +Type Checks +----------- + +We use `mypy `_ to check the type annotations. To check, in the main directory, run: + +.. code-block:: bash + + $ poe type-check + + +Testing Locally +--------------- + +This command will run automatic tests in the main directory + +.. code-block:: bash + + $ poe test + + +Determinism Tests +~~~~~~~~~~~~~~~~~ + +We implemented "determinism tests" for Tianshou's algorithms, which allow us to determine +whether algorithms still compute exactly the same results even after large refactorings. +These tests are applied by + + 1. creating a behavior snapshot ine the old code branch before the changes and then + 2. running the test in the new branch to ensure that the behavior is the same. + +Unfortunately, full determinism is difficult to achieve across different platforms and even different +machines using the same platform an Python environment. +Therefore, these tests are not carried out in the CI pipeline. +Instead, it is up to the developer to run them locally and check the results whenever a change +is made to the code base that could affect algorithm behavior. + +Technically, the two steps are handled by setting static flags in class ``AlgorithmDeterminismTest`` and then +running either the full test suite or a specific determinism test (``test_*_determinism``, e.g. ``test_ddpg_determinism``) +in the two branches to be compared. + + 1. On the old branch: (Temporarily) set ``ENABLED=True`` and ``FORCE_SNAPSHOT_UPDATE=True`` and run the test(s). + 2. On the new branch: (Temporarily) set ``ENABLED=True`` and ``FORCE_SNAPSHOT_UPDATE=False`` and run the test(s). + 3. Inspect the test results; find a summary in ``determinism_tests.log`` + +Test by GitHub Actions +---------------------- + +1. Click the ``Actions`` button in your own repo: + +.. image:: ../_static/images/action1.jpg + :align: center + +2. Click the green button: + +.. image:: ../_static/images/action2.jpg + :align: center + +3. You will see ``Actions Enabled.`` on the top of html page. + +4. When you push a new commit to your own repo (e.g. ``git push``), it will automatically run the test in this page: + +.. image:: ../_static/images/action3.png + :align: center + + +Documentation +------------- + +Documentations are written under the ``docs/`` directory as ReStructuredText (``.rst``) files. ``index.rst`` is the main page. A Tutorial on ReStructuredText can be found `here `_. + +API References are automatically generated by `Sphinx `_ according to the outlines under ``docs/api/`` and should be modified when any code changes. + +To compile documentation into webpage, run + +.. code-block:: bash + + $ poe doc-build + +The generated webpage is in ``docs/_build`` and can be viewed with browser (http://0.0.0.0:8000/). + + +Documentation Generation Test +----------------------------- + +We have the following three documentation tests: + +1. pydocstyle (as part of ruff): test all docstring under ``tianshou/``; + +2. doc8 (as part of ruff): test ReStructuredText format; + +3. sphinx spelling and test: test if there is any error/warning when generating front-end html documentation. + +To check, in the main directory, run: + +.. code-block:: bash + + $ poe lint + $ poe doc-build diff --git a/docs/04_contributing/05_contributors.rst b/docs/04_contributing/05_contributors.rst new file mode 100644 index 000000000..715c24ab3 --- /dev/null +++ b/docs/04_contributing/05_contributors.rst @@ -0,0 +1,28 @@ +Contributors +============ + +We always welcome contributions to help make Tianshou better! +Tianshou was originally created by the `THU-ML Group `_ at Tsinghua University. + +Today, it is backed by the `appliedAI Institute for Europe `_, +which is committed to making Tianshou the go-to resource for reinforcement learning research and development, +and guaranteeing its long-term maintenance and support. + +The original creator Jiayi Weng (`Trinkle23897 `_) continues +to be a key contributor to the project. + +The current tianshou maintainers from the appliedAI Institute for Europe are: + +* Michael Panchenko (`MischaPanch `_) +* Dominik Jain (`opcode81 `_) + + +An incomplete list of the early contributors is: + +* Alexis Duburcq (`duburcqa `_) +* Kaichao You (`youkaichao `_) +* Huayu Chen (`ChenDRAG `_) +* Yi Su (`nuance1979 `_) + + +You can find more information about contributors `here `_. diff --git a/docs/Makefile b/docs/Makefile deleted file mode 100644 index d4bb2cbb9..000000000 --- a/docs/Makefile +++ /dev/null @@ -1,20 +0,0 @@ -# Minimal makefile for Sphinx documentation -# - -# You can set these variables from the command line, and also -# from the environment for the first two. -SPHINXOPTS ?= -SPHINXBUILD ?= sphinx-build -SOURCEDIR = . -BUILDDIR = _build - -# Put it first so that "make" without argument is like "make help". -help: - @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) - -.PHONY: help Makefile - -# Catch-all target: route all unknown targets to Sphinx using the new -# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). -%: Makefile - @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/_config.yml b/docs/_config.yml new file mode 100644 index 000000000..fce609211 --- /dev/null +++ b/docs/_config.yml @@ -0,0 +1,154 @@ +# Book settings +# Learn more at https://jupyterbook.org/customize/config.html + +####################################################################################### +# A default configuration that will be loaded for all jupyter books +# Users are expected to override these values in their own `_config.yml` file. +# This is also the "master list" of all allowed keys and values. + +####################################################################################### +# Book settings +title : Tianshou Documentation # The title of the book. Will be placed in the left navbar. +author : Tianshou contributors # The author of the book +copyright : "2020, Tianshou contributors." # Copyright year to be placed in the footer +logo : _static/images/tianshou-logo.png # A path to the book logo +# Patterns to skip when building the book. Can be glob-style (e.g. "*skip.ipynb") +exclude_patterns : ['**.ipynb_checkpoints', '.DS_Store', 'Thumbs.db', '_build', 'jupyter_execute', '.jupyter_cache', '.pytest_cache', 'docs/autogen_rst.py', 'docs/create_toc.py'] +# Auto-exclude files not in the toc +only_build_toc_files : false + +####################################################################################### +# Execution settings +execute: + execute_notebooks : cache # Whether to execute notebooks at build time. Must be one of ("auto", "force", "cache", "off") + cache : "" # A path to the jupyter cache that will be used to store execution artifacts. Defaults to `_build/.jupyter_cache/` + exclude_patterns : [] # A list of patterns to *skip* in execution (e.g. a notebook that takes a really long time) + timeout : -1 # The maximum time (in seconds) each notebook cell is allowed to run. + run_in_temp : false # If `True`, then a temporary directory will be created and used as the command working directory (cwd), + # otherwise the notebook's parent directory will be the cwd. + allow_errors : false # If `False`, when a code cell raises an error the execution is stopped, otherwise all cells are always run. + stderr_output : show # One of 'show', 'remove', 'remove-warn', 'warn', 'error', 'severe' + +####################################################################################### +# Parse and render settings +parse: + myst_enable_extensions: # default extensions to enable in the myst parser. See https://myst-parser.readthedocs.io/en/latest/using/syntax-optional.html + - amsmath + - colon_fence + # - deflist + - dollarmath + # - html_admonition + # - html_image + - linkify + # - replacements + # - smartquotes + - substitution + - tasklist + myst_url_schemes: [ mailto, http, https ] # URI schemes that will be recognised as external URLs in Markdown links + myst_dmath_double_inline: true # Allow display math ($$) within an inline context + +####################################################################################### +# HTML-specific settings +html: + favicon : "_static/images/tianshou-favicon.png" # A path to a favicon image + use_edit_page_button : false # Whether to add an "edit this page" button to pages. If `true`, repository information in repository: must be filled in + use_repository_button : false # Whether to add a link to your repository button + use_issues_button : false # Whether to add an "open an issue" button + use_multitoc_numbering : true # Continuous numbering across parts/chapters + extra_footer : "" + google_analytics_id : "" # A GA id that can be used to track book views. + home_page_in_navbar : true # Whether to include your home page in the left Navigation Bar + baseurl : "https://tianshou.readthedocs.io/en/master/" + analytics: + + comments: + hypothesis : false + utterances : false + announcement : "" # A banner announcement at the top of the site. + +####################################################################################### +# LaTeX-specific settings +latex: + latex_engine : pdflatex # one of 'pdflatex', 'xelatex' (recommended for unicode), 'luatex', 'platex', 'uplatex' + use_jupyterbook_latex : true # use sphinx-jupyterbook-latex for pdf builds as default + targetname : book.tex +# Add a bibtex file so that we can create citations +bibtex_bibfiles: + - refs.bib + +####################################################################################### +# Launch button settings +launch_buttons: + notebook_interface : classic # The interface interactive links will activate ["classic", "jupyterlab"] + binderhub_url : "" # The URL of the BinderHub (e.g., https://mybinder.org) + jupyterhub_url : "" # The URL of the JupyterHub (e.g., https://datahub.berkeley.edu) + thebe : false # Add a thebe button to pages (requires the repository to run on Binder) + colab_url : "https://colab.research.google.com" + +repository: + url : https://github.com/thu-ml/tianshou # The URL to your book's repository + path_to_book : docs # A path to your book's folder, relative to the repository root. + branch : master # Which branch of the repository should be used when creating links + +####################################################################################### +# Advanced and power-user settings +sphinx: + extra_extensions : + - sphinx.ext.autodoc + - sphinx.ext.viewcode + - sphinx_toolbox.more_autodoc.sourcelink + - sphinxcontrib.spelling + local_extensions : # A list of local extensions to load by sphinx specified by "name: path" items + recursive_update : false # A boolean indicating whether to overwrite the Sphinx config (true) or recursively update (false) + config : # key-value pairs to directly over-ride the Sphinx configuration + autodoc_typehints_format: "short" + autodoc_member_order: "bysource" + autodoc_mock_imports: + # mock imports for optional dependencies (e.g. dependencies of atari/atari_wrapper) + - cv2 + autoclass_content: "both" + autodoc_default_options: + show-inheritance: True + html_js_files: +# We have to list them explicitly because they need to be loaded in a specific order + - js/vega@5.js + - js/vega-lite@5.js + - js/vega-embed@5.js + autodoc_show_sourcelink: True + add_module_names: False + github_username: thu-ml + github_repository: tianshou + python_use_unqualified_type_names: True + nb_mime_priority_overrides: [ + [ 'html', 'application/vnd.jupyter.widget-view+json', 10 ], + [ 'html', 'application/javascript', 20 ], + [ 'html', 'text/html', 30 ], + [ 'html', 'text/latex', 40 ], + [ 'html', 'image/svg+xml', 50 ], + [ 'html', 'image/png', 60 ], + [ 'html', 'image/jpeg', 70 ], + [ 'html', 'text/markdown', 80 ], + [ 'html', 'text/plain', 90 ], + [ 'spelling', 'application/vnd.jupyter.widget-view+json', 10 ], + [ 'spelling', 'application/javascript', 20 ], + [ 'spelling', 'text/html', 30 ], + [ 'spelling', 'text/latex', 40 ], + [ 'spelling', 'image/svg+xml', 50 ], + [ 'spelling', 'image/png', 60 ], + [ 'spelling', 'image/jpeg', 70 ], + [ 'spelling', 'text/markdown', 80 ], + [ 'spelling', 'text/plain', 90 ], + ] + mathjax_path: https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js + mathjax3_config: + loader: { load: [ '[tex]/configmacros' ] } + tex: + packages: { '[+]': [ 'configmacros' ] } + macros: + vect: ["{\\mathbf{\\boldsymbol{#1}} }", 1] + E: "{\\mathbb{E}}" + P: "{\\mathbb{P}}" + R: "{\\mathbb{R}}" + abs: ["{\\left| #1 \\right|}", 1] + simpl: ["{\\Delta^{#1} }", 1] + amax: "{\\text{argmax}}" \ No newline at end of file diff --git a/docs/_static/css/style.css b/docs/_static/css/style.css index b9f323f22..7a964ee10 100644 --- a/docs/_static/css/style.css +++ b/docs/_static/css/style.css @@ -50,6 +50,13 @@ h1, h2, .rst-content .toctree-wrapper p.caption, h3, h4, h5, h6, legend, p.capti display: none; } +@media (min-width: 960px) { + .bd-page-width { + max-width: none !important; + } +} + + @media screen and (max-width: 768px) { .wy-side-nav-search>a img.logo { height: 60px; diff --git a/docs/_static/images/action1.jpg b/docs/_static/images/action1.jpg index 59790a36f..49620d512 100644 Binary files a/docs/_static/images/action1.jpg and b/docs/_static/images/action1.jpg differ diff --git a/docs/_static/images/action2.jpg b/docs/_static/images/action2.jpg index 4f3ade3e0..e07c33f52 100644 Binary files a/docs/_static/images/action2.jpg and b/docs/_static/images/action2.jpg differ diff --git a/docs/_static/images/action3.png b/docs/_static/images/action3.png index ef510cf16..8da8da442 100644 Binary files a/docs/_static/images/action3.png and b/docs/_static/images/action3.png differ diff --git a/docs/_static/images/aggregation.png b/docs/_static/images/aggregation.png index dbcd9b33d..a3061cd91 100644 Binary files a/docs/_static/images/aggregation.png and b/docs/_static/images/aggregation.png differ diff --git a/docs/_static/images/async.png b/docs/_static/images/async.png index 11547dedc..b91cbafb4 100644 Binary files a/docs/_static/images/async.png and b/docs/_static/images/async.png differ diff --git a/docs/_static/images/batch_reserve.png b/docs/_static/images/batch_reserve.png index 49c8623c7..3731e9327 100644 Binary files a/docs/_static/images/batch_reserve.png and b/docs/_static/images/batch_reserve.png differ diff --git a/docs/_static/images/batch_tree.png b/docs/_static/images/batch_tree.png index b914c2ce1..e366eee4f 100644 Binary files a/docs/_static/images/batch_tree.png and b/docs/_static/images/batch_tree.png differ diff --git a/docs/_static/images/concepts_arch.png b/docs/_static/images/concepts_arch.png index baaa7f8fc..e256af19d 100644 Binary files a/docs/_static/images/concepts_arch.png and b/docs/_static/images/concepts_arch.png differ diff --git a/docs/_static/images/concepts_arch2.png b/docs/_static/images/concepts_arch2.png index 58d8c62e7..3489d0dea 100644 Binary files a/docs/_static/images/concepts_arch2.png and b/docs/_static/images/concepts_arch2.png differ diff --git a/docs/_static/images/discrete_dqn_hl.gif b/docs/_static/images/discrete_dqn_hl.gif new file mode 100644 index 000000000..e958d5a07 Binary files /dev/null and b/docs/_static/images/discrete_dqn_hl.gif differ diff --git a/docs/_static/images/marl.png b/docs/_static/images/marl.png index cf368d5ef..6dfda1681 100644 Binary files a/docs/_static/images/marl.png and b/docs/_static/images/marl.png differ diff --git a/docs/_static/images/pipeline.png b/docs/_static/images/pipeline.png index 5cbfaa386..960adbded 100644 Binary files a/docs/_static/images/pipeline.png and b/docs/_static/images/pipeline.png differ diff --git a/docs/_static/images/policy_table.svg b/docs/_static/images/policy_table.svg new file mode 100644 index 000000000..7abf43b5e Binary files /dev/null and b/docs/_static/images/policy_table.svg differ diff --git a/docs/_static/images/pseudocode_off_policy.svg b/docs/_static/images/pseudocode_off_policy.svg new file mode 100644 index 000000000..c16037a42 Binary files /dev/null and b/docs/_static/images/pseudocode_off_policy.svg differ diff --git a/docs/_static/images/structure.svg b/docs/_static/images/structure.svg new file mode 100644 index 000000000..2119ec176 --- /dev/null +++ b/docs/_static/images/structure.svg @@ -0,0 +1,3 @@ + + +
Agent
Agent
VecBuffer
VecBuffer
Buf 1
Buf 1
Buf 2
Buf 2
Buf 3
Buf 3
···
···
Buf n
Buf n
···
···
···
···
env.step()
env.step()
batch
batch
buffer.add()
buffer.add()
batch
batch
buffer.sample()
buffer.sample...
Pytorch Module
Pytorch Module
policy.process_fn()
policy.proces...
policy.learn()
policy.learn()
VecEnv
VecEnv
Env 1
Env 1
Env 2
Env 2
Env 3
Env 3
···
···
Env n
Env n
···
···
···
···
batch
batch
policy.forward()
policy.forwar...
observations
observations
env.step()
env.step()
collector.collect()
collector.col...
actions
actions
collector.collect()
collector.col...
Collector
Collector
Policy
Policy
Logger
Logger
Trainer
Trainer
statistics
statistics
statistics
statistics
policy.update()
policy.update...
split data
split data
concatenate data
concatenate data
Viewer does not support full SVG 1.1
\ No newline at end of file diff --git a/docs/_static/images/tianshou-favicon.png b/docs/_static/images/tianshou-favicon.png new file mode 100644 index 000000000..3fafa0a23 Binary files /dev/null and b/docs/_static/images/tianshou-favicon.png differ diff --git a/docs/_static/images/tianshou-logo.png b/docs/_static/images/tianshou-logo.png index fa31829cc..ebb93acd0 100644 Binary files a/docs/_static/images/tianshou-logo.png and b/docs/_static/images/tianshou-logo.png differ diff --git a/docs/_static/images/tic-tac-toe.png b/docs/_static/images/tic-tac-toe.png index 071fa9f5c..4792c090d 100644 Binary files a/docs/_static/images/tic-tac-toe.png and b/docs/_static/images/tic-tac-toe.png differ diff --git a/docs/_static/images/timelimit.svg b/docs/_static/images/timelimit.svg new file mode 100644 index 000000000..b35ddd6f6 --- /dev/null +++ b/docs/_static/images/timelimit.svg @@ -0,0 +1,3 @@ + + +
End because of time limit
End because of time limit
End because enough timesteps collected
End because enough timesteps collected
End normally
End normally
Data batch
Data batch
Env n
Env n
b_{h}
b_{1}
b_{h+1}
b_{nh}
links all data segments sequentially
links all data segments sequentially
buffer.sample(0)
buffer.sample(0)
Env 1
Env 1
Env 2
Env 2
Viewer does not support full SVG 1.1
\ No newline at end of file diff --git a/docs/_static/js/atari b/docs/_static/js/atari deleted file mode 120000 index 6423b4d07..000000000 --- a/docs/_static/js/atari +++ /dev/null @@ -1 +0,0 @@ -../../../examples/atari \ No newline at end of file diff --git a/docs/_static/js/atari/benchmark b/docs/_static/js/atari/benchmark new file mode 120000 index 000000000..000f92bb8 --- /dev/null +++ b/docs/_static/js/atari/benchmark @@ -0,0 +1 @@ +../../../../examples/atari/benchmark \ No newline at end of file diff --git a/docs/_static/js/benchmark.js b/docs/_static/js/benchmark.js index c0663959c..da44b43a9 100644 --- a/docs/_static/js/benchmark.js +++ b/docs/_static/js/benchmark.js @@ -20,13 +20,12 @@ var atari_envs = [ "SpaceInvadersNoFrameskip-v4", ]; -function showMujocoEnv(elem) { - var selectEnv = elem.value || mujoco_envs[0]; - var dataSource = { +function getDataSource(selectEnv, dirName) { + return { + // Paths are relative to the only file using this script, which is docs/01_tutorials/06_benchmark.rst $schema: "https://vega.github.io/schema/vega-lite/v5.json", data: { - // url: "/_static/js/mujoco/benchmark/" + selectEnv + "/result.json" - url: "/en/master/_static/js/mujoco/benchmark/" + selectEnv + "/result.json" + url: "../_static/js/" + dirName + "/benchmark/" + selectEnv + "/result.json" }, mark: "line", height: 400, @@ -68,69 +67,31 @@ function showMujocoEnv(elem) { "mark": "line" }] }; +} + +function showMujocoResults(elem) { + const selectEnv = elem.value || mujoco_envs[0]; + const dataSource = getDataSource(selectEnv, "mujoco"); vegaEmbed("#vis-mujoco", dataSource); } -function showAtariEnv(elem) { - var selectEnv = elem.value || atari_envs[0]; - var dataSource = { - $schema: "https://vega.github.io/schema/vega-lite/v5.json", - data: { - // url: "/_static/js/atari/benchmark/" + selectEnv + "/result.json" - url: "/en/master/_static/js/atari/benchmark/" + selectEnv + "/result.json" - }, - mark: "line", - height: 400, - width: 800, - params: [{name: "Range", value: 10000000, bind: {input: "range", min: 10000, max: 10000000}}], - transform: [ - {calculate: "datum.rew - datum.rew_std", as: "rew_std0"}, - {calculate: "datum.rew + datum.rew_std", as: "rew_std1"}, - {calculate: "datum.rew + ' ± ' + datum.rew_std", as: "tooltip_str"}, - {filter: "datum.env_step <= Range"}, - ], - encoding: { - color: {"field": "Agent", "type": "nominal"}, - x: {field: "env_step", type: "quantitative", title: "Env step"}, - }, - layer: [{ - "encoding": { - "opacity": {"value": 0.3}, - "y": { - "title": "Return", - "field": "rew_std0", - "type": "quantitative", - }, - "y2": {"field": "rew_std1"}, - tooltip: [ - {field: "env_step", type: "quantitative", title: "Env step"}, - {field: "Agent", type: "nominal"}, - {field: "tooltip_str", type: "nominal", title: "Return"}, - ] - }, - "mark": "area" - }, { - "encoding": { - "y": { - "field": "rew", - "type": "quantitative" - } - }, - "mark": "line" - }] - }; +function showAtariResults(elem) { + const selectEnv = elem.value || atari_envs[0]; + const dataSource = getDataSource(selectEnv, "atari"); vegaEmbed("#vis-atari", dataSource); } -$(document).ready(function() { + + +document.addEventListener('DOMContentLoaded', function() { var envMujocoSelect = $("#env-mujoco"); if (envMujocoSelect.length) { $.each(mujoco_envs, function(idx, env) {envMujocoSelect.append($("").val(env).html(env));}) - showMujocoEnv(envMujocoSelect); + showMujocoResults(envMujocoSelect); } var envAtariSelect = $("#env-atari"); if (envAtariSelect.length) { $.each(atari_envs, function(idx, env) {envAtariSelect.append($("").val(env).html(env));}) - showAtariEnv(envAtariSelect); + showAtariResults(envAtariSelect); } }); diff --git a/docs/_static/js/copybutton.js b/docs/_static/js/copybutton.js index f5960d268..35ccc04f9 100644 --- a/docs/_static/js/copybutton.js +++ b/docs/_static/js/copybutton.js @@ -1,4 +1,4 @@ -$(document).ready(function() { +document.addEventListener('DOMContentLoaded', function() { /* Add a [>>>] button on the top-right corner of code samples to hide * the >>> and ... prompts and the output and thus make the code * copyable. */ diff --git a/docs/_static/js/jquery-1.12.4.min.js b/docs/_static/js/jquery-1.12.4.min.js new file mode 100644 index 000000000..e83647587 --- /dev/null +++ b/docs/_static/js/jquery-1.12.4.min.js @@ -0,0 +1,5 @@ +/*! jQuery v1.12.4 | (c) jQuery Foundation | jquery.org/license */ +!function(a,b){"object"==typeof module&&"object"==typeof module.exports?module.exports=a.document?b(a,!0):function(a){if(!a.document)throw new Error("jQuery requires a window with a document");return b(a)}:b(a)}("undefined"!=typeof window?window:this,function(a,b){var c=[],d=a.document,e=c.slice,f=c.concat,g=c.push,h=c.indexOf,i={},j=i.toString,k=i.hasOwnProperty,l={},m="1.12.4",n=function(a,b){return new n.fn.init(a,b)},o=/^[\s\uFEFF\xA0]+|[\s\uFEFF\xA0]+$/g,p=/^-ms-/,q=/-([\da-z])/gi,r=function(a,b){return b.toUpperCase()};n.fn=n.prototype={jquery:m,constructor:n,selector:"",length:0,toArray:function(){return e.call(this)},get:function(a){return null!=a?0>a?this[a+this.length]:this[a]:e.call(this)},pushStack:function(a){var b=n.merge(this.constructor(),a);return b.prevObject=this,b.context=this.context,b},each:function(a){return n.each(this,a)},map:function(a){return this.pushStack(n.map(this,function(b,c){return a.call(b,c,b)}))},slice:function(){return this.pushStack(e.apply(this,arguments))},first:function(){return this.eq(0)},last:function(){return this.eq(-1)},eq:function(a){var b=this.length,c=+a+(0>a?b:0);return this.pushStack(c>=0&&b>c?[this[c]]:[])},end:function(){return this.prevObject||this.constructor()},push:g,sort:c.sort,splice:c.splice},n.extend=n.fn.extend=function(){var a,b,c,d,e,f,g=arguments[0]||{},h=1,i=arguments.length,j=!1;for("boolean"==typeof g&&(j=g,g=arguments[h]||{},h++),"object"==typeof g||n.isFunction(g)||(g={}),h===i&&(g=this,h--);i>h;h++)if(null!=(e=arguments[h]))for(d in e)a=g[d],c=e[d],g!==c&&(j&&c&&(n.isPlainObject(c)||(b=n.isArray(c)))?(b?(b=!1,f=a&&n.isArray(a)?a:[]):f=a&&n.isPlainObject(a)?a:{},g[d]=n.extend(j,f,c)):void 0!==c&&(g[d]=c));return g},n.extend({expando:"jQuery"+(m+Math.random()).replace(/\D/g,""),isReady:!0,error:function(a){throw new Error(a)},noop:function(){},isFunction:function(a){return"function"===n.type(a)},isArray:Array.isArray||function(a){return"array"===n.type(a)},isWindow:function(a){return null!=a&&a==a.window},isNumeric:function(a){var b=a&&a.toString();return!n.isArray(a)&&b-parseFloat(b)+1>=0},isEmptyObject:function(a){var b;for(b in a)return!1;return!0},isPlainObject:function(a){var b;if(!a||"object"!==n.type(a)||a.nodeType||n.isWindow(a))return!1;try{if(a.constructor&&!k.call(a,"constructor")&&!k.call(a.constructor.prototype,"isPrototypeOf"))return!1}catch(c){return!1}if(!l.ownFirst)for(b in a)return k.call(a,b);for(b in a);return void 0===b||k.call(a,b)},type:function(a){return null==a?a+"":"object"==typeof a||"function"==typeof a?i[j.call(a)]||"object":typeof a},globalEval:function(b){b&&n.trim(b)&&(a.execScript||function(b){a.eval.call(a,b)})(b)},camelCase:function(a){return a.replace(p,"ms-").replace(q,r)},nodeName:function(a,b){return a.nodeName&&a.nodeName.toLowerCase()===b.toLowerCase()},each:function(a,b){var c,d=0;if(s(a)){for(c=a.length;c>d;d++)if(b.call(a[d],d,a[d])===!1)break}else for(d in a)if(b.call(a[d],d,a[d])===!1)break;return a},trim:function(a){return null==a?"":(a+"").replace(o,"")},makeArray:function(a,b){var c=b||[];return null!=a&&(s(Object(a))?n.merge(c,"string"==typeof a?[a]:a):g.call(c,a)),c},inArray:function(a,b,c){var d;if(b){if(h)return h.call(b,a,c);for(d=b.length,c=c?0>c?Math.max(0,d+c):c:0;d>c;c++)if(c in b&&b[c]===a)return c}return-1},merge:function(a,b){var c=+b.length,d=0,e=a.length;while(c>d)a[e++]=b[d++];if(c!==c)while(void 0!==b[d])a[e++]=b[d++];return a.length=e,a},grep:function(a,b,c){for(var d,e=[],f=0,g=a.length,h=!c;g>f;f++)d=!b(a[f],f),d!==h&&e.push(a[f]);return e},map:function(a,b,c){var d,e,g=0,h=[];if(s(a))for(d=a.length;d>g;g++)e=b(a[g],g,c),null!=e&&h.push(e);else for(g in a)e=b(a[g],g,c),null!=e&&h.push(e);return f.apply([],h)},guid:1,proxy:function(a,b){var c,d,f;return"string"==typeof b&&(f=a[b],b=a,a=f),n.isFunction(a)?(c=e.call(arguments,2),d=function(){return a.apply(b||this,c.concat(e.call(arguments)))},d.guid=a.guid=a.guid||n.guid++,d):void 0},now:function(){return+new Date},support:l}),"function"==typeof Symbol&&(n.fn[Symbol.iterator]=c[Symbol.iterator]),n.each("Boolean Number String Function Array Date RegExp Object Error Symbol".split(" "),function(a,b){i["[object "+b+"]"]=b.toLowerCase()});function s(a){var b=!!a&&"length"in a&&a.length,c=n.type(a);return"function"===c||n.isWindow(a)?!1:"array"===c||0===b||"number"==typeof b&&b>0&&b-1 in a}var t=function(a){var b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u="sizzle"+1*new Date,v=a.document,w=0,x=0,y=ga(),z=ga(),A=ga(),B=function(a,b){return a===b&&(l=!0),0},C=1<<31,D={}.hasOwnProperty,E=[],F=E.pop,G=E.push,H=E.push,I=E.slice,J=function(a,b){for(var c=0,d=a.length;d>c;c++)if(a[c]===b)return c;return-1},K="checked|selected|async|autofocus|autoplay|controls|defer|disabled|hidden|ismap|loop|multiple|open|readonly|required|scoped",L="[\\x20\\t\\r\\n\\f]",M="(?:\\\\.|[\\w-]|[^\\x00-\\xa0])+",N="\\["+L+"*("+M+")(?:"+L+"*([*^$|!~]?=)"+L+"*(?:'((?:\\\\.|[^\\\\'])*)'|\"((?:\\\\.|[^\\\\\"])*)\"|("+M+"))|)"+L+"*\\]",O=":("+M+")(?:\\((('((?:\\\\.|[^\\\\'])*)'|\"((?:\\\\.|[^\\\\\"])*)\")|((?:\\\\.|[^\\\\()[\\]]|"+N+")*)|.*)\\)|)",P=new RegExp(L+"+","g"),Q=new RegExp("^"+L+"+|((?:^|[^\\\\])(?:\\\\.)*)"+L+"+$","g"),R=new RegExp("^"+L+"*,"+L+"*"),S=new RegExp("^"+L+"*([>+~]|"+L+")"+L+"*"),T=new RegExp("="+L+"*([^\\]'\"]*?)"+L+"*\\]","g"),U=new RegExp(O),V=new RegExp("^"+M+"$"),W={ID:new RegExp("^#("+M+")"),CLASS:new RegExp("^\\.("+M+")"),TAG:new RegExp("^("+M+"|[*])"),ATTR:new RegExp("^"+N),PSEUDO:new RegExp("^"+O),CHILD:new RegExp("^:(only|first|last|nth|nth-last)-(child|of-type)(?:\\("+L+"*(even|odd|(([+-]|)(\\d*)n|)"+L+"*(?:([+-]|)"+L+"*(\\d+)|))"+L+"*\\)|)","i"),bool:new RegExp("^(?:"+K+")$","i"),needsContext:new RegExp("^"+L+"*[>+~]|:(even|odd|eq|gt|lt|nth|first|last)(?:\\("+L+"*((?:-\\d)?\\d*)"+L+"*\\)|)(?=[^-]|$)","i")},X=/^(?:input|select|textarea|button)$/i,Y=/^h\d$/i,Z=/^[^{]+\{\s*\[native \w/,$=/^(?:#([\w-]+)|(\w+)|\.([\w-]+))$/,_=/[+~]/,aa=/'|\\/g,ba=new RegExp("\\\\([\\da-f]{1,6}"+L+"?|("+L+")|.)","ig"),ca=function(a,b,c){var d="0x"+b-65536;return d!==d||c?b:0>d?String.fromCharCode(d+65536):String.fromCharCode(d>>10|55296,1023&d|56320)},da=function(){m()};try{H.apply(E=I.call(v.childNodes),v.childNodes),E[v.childNodes.length].nodeType}catch(ea){H={apply:E.length?function(a,b){G.apply(a,I.call(b))}:function(a,b){var c=a.length,d=0;while(a[c++]=b[d++]);a.length=c-1}}}function fa(a,b,d,e){var f,h,j,k,l,o,r,s,w=b&&b.ownerDocument,x=b?b.nodeType:9;if(d=d||[],"string"!=typeof a||!a||1!==x&&9!==x&&11!==x)return d;if(!e&&((b?b.ownerDocument||b:v)!==n&&m(b),b=b||n,p)){if(11!==x&&(o=$.exec(a)))if(f=o[1]){if(9===x){if(!(j=b.getElementById(f)))return d;if(j.id===f)return d.push(j),d}else if(w&&(j=w.getElementById(f))&&t(b,j)&&j.id===f)return d.push(j),d}else{if(o[2])return H.apply(d,b.getElementsByTagName(a)),d;if((f=o[3])&&c.getElementsByClassName&&b.getElementsByClassName)return H.apply(d,b.getElementsByClassName(f)),d}if(c.qsa&&!A[a+" "]&&(!q||!q.test(a))){if(1!==x)w=b,s=a;else if("object"!==b.nodeName.toLowerCase()){(k=b.getAttribute("id"))?k=k.replace(aa,"\\$&"):b.setAttribute("id",k=u),r=g(a),h=r.length,l=V.test(k)?"#"+k:"[id='"+k+"']";while(h--)r[h]=l+" "+qa(r[h]);s=r.join(","),w=_.test(a)&&oa(b.parentNode)||b}if(s)try{return H.apply(d,w.querySelectorAll(s)),d}catch(y){}finally{k===u&&b.removeAttribute("id")}}}return i(a.replace(Q,"$1"),b,d,e)}function ga(){var a=[];function b(c,e){return a.push(c+" ")>d.cacheLength&&delete b[a.shift()],b[c+" "]=e}return b}function ha(a){return a[u]=!0,a}function ia(a){var b=n.createElement("div");try{return!!a(b)}catch(c){return!1}finally{b.parentNode&&b.parentNode.removeChild(b),b=null}}function ja(a,b){var c=a.split("|"),e=c.length;while(e--)d.attrHandle[c[e]]=b}function ka(a,b){var c=b&&a,d=c&&1===a.nodeType&&1===b.nodeType&&(~b.sourceIndex||C)-(~a.sourceIndex||C);if(d)return d;if(c)while(c=c.nextSibling)if(c===b)return-1;return a?1:-1}function la(a){return function(b){var c=b.nodeName.toLowerCase();return"input"===c&&b.type===a}}function ma(a){return function(b){var c=b.nodeName.toLowerCase();return("input"===c||"button"===c)&&b.type===a}}function na(a){return ha(function(b){return b=+b,ha(function(c,d){var e,f=a([],c.length,b),g=f.length;while(g--)c[e=f[g]]&&(c[e]=!(d[e]=c[e]))})})}function oa(a){return a&&"undefined"!=typeof a.getElementsByTagName&&a}c=fa.support={},f=fa.isXML=function(a){var b=a&&(a.ownerDocument||a).documentElement;return b?"HTML"!==b.nodeName:!1},m=fa.setDocument=function(a){var b,e,g=a?a.ownerDocument||a:v;return g!==n&&9===g.nodeType&&g.documentElement?(n=g,o=n.documentElement,p=!f(n),(e=n.defaultView)&&e.top!==e&&(e.addEventListener?e.addEventListener("unload",da,!1):e.attachEvent&&e.attachEvent("onunload",da)),c.attributes=ia(function(a){return a.className="i",!a.getAttribute("className")}),c.getElementsByTagName=ia(function(a){return a.appendChild(n.createComment("")),!a.getElementsByTagName("*").length}),c.getElementsByClassName=Z.test(n.getElementsByClassName),c.getById=ia(function(a){return o.appendChild(a).id=u,!n.getElementsByName||!n.getElementsByName(u).length}),c.getById?(d.find.ID=function(a,b){if("undefined"!=typeof b.getElementById&&p){var c=b.getElementById(a);return c?[c]:[]}},d.filter.ID=function(a){var b=a.replace(ba,ca);return function(a){return a.getAttribute("id")===b}}):(delete d.find.ID,d.filter.ID=function(a){var b=a.replace(ba,ca);return function(a){var c="undefined"!=typeof a.getAttributeNode&&a.getAttributeNode("id");return c&&c.value===b}}),d.find.TAG=c.getElementsByTagName?function(a,b){return"undefined"!=typeof b.getElementsByTagName?b.getElementsByTagName(a):c.qsa?b.querySelectorAll(a):void 0}:function(a,b){var c,d=[],e=0,f=b.getElementsByTagName(a);if("*"===a){while(c=f[e++])1===c.nodeType&&d.push(c);return d}return f},d.find.CLASS=c.getElementsByClassName&&function(a,b){return"undefined"!=typeof b.getElementsByClassName&&p?b.getElementsByClassName(a):void 0},r=[],q=[],(c.qsa=Z.test(n.querySelectorAll))&&(ia(function(a){o.appendChild(a).innerHTML="",a.querySelectorAll("[msallowcapture^='']").length&&q.push("[*^$]="+L+"*(?:''|\"\")"),a.querySelectorAll("[selected]").length||q.push("\\["+L+"*(?:value|"+K+")"),a.querySelectorAll("[id~="+u+"-]").length||q.push("~="),a.querySelectorAll(":checked").length||q.push(":checked"),a.querySelectorAll("a#"+u+"+*").length||q.push(".#.+[+~]")}),ia(function(a){var b=n.createElement("input");b.setAttribute("type","hidden"),a.appendChild(b).setAttribute("name","D"),a.querySelectorAll("[name=d]").length&&q.push("name"+L+"*[*^$|!~]?="),a.querySelectorAll(":enabled").length||q.push(":enabled",":disabled"),a.querySelectorAll("*,:x"),q.push(",.*:")})),(c.matchesSelector=Z.test(s=o.matches||o.webkitMatchesSelector||o.mozMatchesSelector||o.oMatchesSelector||o.msMatchesSelector))&&ia(function(a){c.disconnectedMatch=s.call(a,"div"),s.call(a,"[s!='']:x"),r.push("!=",O)}),q=q.length&&new RegExp(q.join("|")),r=r.length&&new RegExp(r.join("|")),b=Z.test(o.compareDocumentPosition),t=b||Z.test(o.contains)?function(a,b){var c=9===a.nodeType?a.documentElement:a,d=b&&b.parentNode;return a===d||!(!d||1!==d.nodeType||!(c.contains?c.contains(d):a.compareDocumentPosition&&16&a.compareDocumentPosition(d)))}:function(a,b){if(b)while(b=b.parentNode)if(b===a)return!0;return!1},B=b?function(a,b){if(a===b)return l=!0,0;var d=!a.compareDocumentPosition-!b.compareDocumentPosition;return d?d:(d=(a.ownerDocument||a)===(b.ownerDocument||b)?a.compareDocumentPosition(b):1,1&d||!c.sortDetached&&b.compareDocumentPosition(a)===d?a===n||a.ownerDocument===v&&t(v,a)?-1:b===n||b.ownerDocument===v&&t(v,b)?1:k?J(k,a)-J(k,b):0:4&d?-1:1)}:function(a,b){if(a===b)return l=!0,0;var c,d=0,e=a.parentNode,f=b.parentNode,g=[a],h=[b];if(!e||!f)return a===n?-1:b===n?1:e?-1:f?1:k?J(k,a)-J(k,b):0;if(e===f)return ka(a,b);c=a;while(c=c.parentNode)g.unshift(c);c=b;while(c=c.parentNode)h.unshift(c);while(g[d]===h[d])d++;return d?ka(g[d],h[d]):g[d]===v?-1:h[d]===v?1:0},n):n},fa.matches=function(a,b){return fa(a,null,null,b)},fa.matchesSelector=function(a,b){if((a.ownerDocument||a)!==n&&m(a),b=b.replace(T,"='$1']"),c.matchesSelector&&p&&!A[b+" "]&&(!r||!r.test(b))&&(!q||!q.test(b)))try{var d=s.call(a,b);if(d||c.disconnectedMatch||a.document&&11!==a.document.nodeType)return d}catch(e){}return fa(b,n,null,[a]).length>0},fa.contains=function(a,b){return(a.ownerDocument||a)!==n&&m(a),t(a,b)},fa.attr=function(a,b){(a.ownerDocument||a)!==n&&m(a);var e=d.attrHandle[b.toLowerCase()],f=e&&D.call(d.attrHandle,b.toLowerCase())?e(a,b,!p):void 0;return void 0!==f?f:c.attributes||!p?a.getAttribute(b):(f=a.getAttributeNode(b))&&f.specified?f.value:null},fa.error=function(a){throw new Error("Syntax error, unrecognized expression: "+a)},fa.uniqueSort=function(a){var b,d=[],e=0,f=0;if(l=!c.detectDuplicates,k=!c.sortStable&&a.slice(0),a.sort(B),l){while(b=a[f++])b===a[f]&&(e=d.push(f));while(e--)a.splice(d[e],1)}return k=null,a},e=fa.getText=function(a){var b,c="",d=0,f=a.nodeType;if(f){if(1===f||9===f||11===f){if("string"==typeof a.textContent)return a.textContent;for(a=a.firstChild;a;a=a.nextSibling)c+=e(a)}else if(3===f||4===f)return a.nodeValue}else while(b=a[d++])c+=e(b);return c},d=fa.selectors={cacheLength:50,createPseudo:ha,match:W,attrHandle:{},find:{},relative:{">":{dir:"parentNode",first:!0}," ":{dir:"parentNode"},"+":{dir:"previousSibling",first:!0},"~":{dir:"previousSibling"}},preFilter:{ATTR:function(a){return a[1]=a[1].replace(ba,ca),a[3]=(a[3]||a[4]||a[5]||"").replace(ba,ca),"~="===a[2]&&(a[3]=" "+a[3]+" "),a.slice(0,4)},CHILD:function(a){return a[1]=a[1].toLowerCase(),"nth"===a[1].slice(0,3)?(a[3]||fa.error(a[0]),a[4]=+(a[4]?a[5]+(a[6]||1):2*("even"===a[3]||"odd"===a[3])),a[5]=+(a[7]+a[8]||"odd"===a[3])):a[3]&&fa.error(a[0]),a},PSEUDO:function(a){var b,c=!a[6]&&a[2];return W.CHILD.test(a[0])?null:(a[3]?a[2]=a[4]||a[5]||"":c&&U.test(c)&&(b=g(c,!0))&&(b=c.indexOf(")",c.length-b)-c.length)&&(a[0]=a[0].slice(0,b),a[2]=c.slice(0,b)),a.slice(0,3))}},filter:{TAG:function(a){var b=a.replace(ba,ca).toLowerCase();return"*"===a?function(){return!0}:function(a){return a.nodeName&&a.nodeName.toLowerCase()===b}},CLASS:function(a){var b=y[a+" "];return b||(b=new RegExp("(^|"+L+")"+a+"("+L+"|$)"))&&y(a,function(a){return b.test("string"==typeof a.className&&a.className||"undefined"!=typeof a.getAttribute&&a.getAttribute("class")||"")})},ATTR:function(a,b,c){return function(d){var e=fa.attr(d,a);return null==e?"!="===b:b?(e+="","="===b?e===c:"!="===b?e!==c:"^="===b?c&&0===e.indexOf(c):"*="===b?c&&e.indexOf(c)>-1:"$="===b?c&&e.slice(-c.length)===c:"~="===b?(" "+e.replace(P," ")+" ").indexOf(c)>-1:"|="===b?e===c||e.slice(0,c.length+1)===c+"-":!1):!0}},CHILD:function(a,b,c,d,e){var f="nth"!==a.slice(0,3),g="last"!==a.slice(-4),h="of-type"===b;return 1===d&&0===e?function(a){return!!a.parentNode}:function(b,c,i){var j,k,l,m,n,o,p=f!==g?"nextSibling":"previousSibling",q=b.parentNode,r=h&&b.nodeName.toLowerCase(),s=!i&&!h,t=!1;if(q){if(f){while(p){m=b;while(m=m[p])if(h?m.nodeName.toLowerCase()===r:1===m.nodeType)return!1;o=p="only"===a&&!o&&"nextSibling"}return!0}if(o=[g?q.firstChild:q.lastChild],g&&s){m=q,l=m[u]||(m[u]={}),k=l[m.uniqueID]||(l[m.uniqueID]={}),j=k[a]||[],n=j[0]===w&&j[1],t=n&&j[2],m=n&&q.childNodes[n];while(m=++n&&m&&m[p]||(t=n=0)||o.pop())if(1===m.nodeType&&++t&&m===b){k[a]=[w,n,t];break}}else if(s&&(m=b,l=m[u]||(m[u]={}),k=l[m.uniqueID]||(l[m.uniqueID]={}),j=k[a]||[],n=j[0]===w&&j[1],t=n),t===!1)while(m=++n&&m&&m[p]||(t=n=0)||o.pop())if((h?m.nodeName.toLowerCase()===r:1===m.nodeType)&&++t&&(s&&(l=m[u]||(m[u]={}),k=l[m.uniqueID]||(l[m.uniqueID]={}),k[a]=[w,t]),m===b))break;return t-=e,t===d||t%d===0&&t/d>=0}}},PSEUDO:function(a,b){var c,e=d.pseudos[a]||d.setFilters[a.toLowerCase()]||fa.error("unsupported pseudo: "+a);return e[u]?e(b):e.length>1?(c=[a,a,"",b],d.setFilters.hasOwnProperty(a.toLowerCase())?ha(function(a,c){var d,f=e(a,b),g=f.length;while(g--)d=J(a,f[g]),a[d]=!(c[d]=f[g])}):function(a){return e(a,0,c)}):e}},pseudos:{not:ha(function(a){var b=[],c=[],d=h(a.replace(Q,"$1"));return d[u]?ha(function(a,b,c,e){var f,g=d(a,null,e,[]),h=a.length;while(h--)(f=g[h])&&(a[h]=!(b[h]=f))}):function(a,e,f){return b[0]=a,d(b,null,f,c),b[0]=null,!c.pop()}}),has:ha(function(a){return function(b){return fa(a,b).length>0}}),contains:ha(function(a){return a=a.replace(ba,ca),function(b){return(b.textContent||b.innerText||e(b)).indexOf(a)>-1}}),lang:ha(function(a){return V.test(a||"")||fa.error("unsupported lang: "+a),a=a.replace(ba,ca).toLowerCase(),function(b){var c;do if(c=p?b.lang:b.getAttribute("xml:lang")||b.getAttribute("lang"))return c=c.toLowerCase(),c===a||0===c.indexOf(a+"-");while((b=b.parentNode)&&1===b.nodeType);return!1}}),target:function(b){var c=a.location&&a.location.hash;return c&&c.slice(1)===b.id},root:function(a){return a===o},focus:function(a){return a===n.activeElement&&(!n.hasFocus||n.hasFocus())&&!!(a.type||a.href||~a.tabIndex)},enabled:function(a){return a.disabled===!1},disabled:function(a){return a.disabled===!0},checked:function(a){var b=a.nodeName.toLowerCase();return"input"===b&&!!a.checked||"option"===b&&!!a.selected},selected:function(a){return a.parentNode&&a.parentNode.selectedIndex,a.selected===!0},empty:function(a){for(a=a.firstChild;a;a=a.nextSibling)if(a.nodeType<6)return!1;return!0},parent:function(a){return!d.pseudos.empty(a)},header:function(a){return Y.test(a.nodeName)},input:function(a){return X.test(a.nodeName)},button:function(a){var b=a.nodeName.toLowerCase();return"input"===b&&"button"===a.type||"button"===b},text:function(a){var b;return"input"===a.nodeName.toLowerCase()&&"text"===a.type&&(null==(b=a.getAttribute("type"))||"text"===b.toLowerCase())},first:na(function(){return[0]}),last:na(function(a,b){return[b-1]}),eq:na(function(a,b,c){return[0>c?c+b:c]}),even:na(function(a,b){for(var c=0;b>c;c+=2)a.push(c);return a}),odd:na(function(a,b){for(var c=1;b>c;c+=2)a.push(c);return a}),lt:na(function(a,b,c){for(var d=0>c?c+b:c;--d>=0;)a.push(d);return a}),gt:na(function(a,b,c){for(var d=0>c?c+b:c;++db;b++)d+=a[b].value;return d}function ra(a,b,c){var d=b.dir,e=c&&"parentNode"===d,f=x++;return b.first?function(b,c,f){while(b=b[d])if(1===b.nodeType||e)return a(b,c,f)}:function(b,c,g){var h,i,j,k=[w,f];if(g){while(b=b[d])if((1===b.nodeType||e)&&a(b,c,g))return!0}else while(b=b[d])if(1===b.nodeType||e){if(j=b[u]||(b[u]={}),i=j[b.uniqueID]||(j[b.uniqueID]={}),(h=i[d])&&h[0]===w&&h[1]===f)return k[2]=h[2];if(i[d]=k,k[2]=a(b,c,g))return!0}}}function sa(a){return a.length>1?function(b,c,d){var e=a.length;while(e--)if(!a[e](b,c,d))return!1;return!0}:a[0]}function ta(a,b,c){for(var d=0,e=b.length;e>d;d++)fa(a,b[d],c);return c}function ua(a,b,c,d,e){for(var f,g=[],h=0,i=a.length,j=null!=b;i>h;h++)(f=a[h])&&(c&&!c(f,d,e)||(g.push(f),j&&b.push(h)));return g}function va(a,b,c,d,e,f){return d&&!d[u]&&(d=va(d)),e&&!e[u]&&(e=va(e,f)),ha(function(f,g,h,i){var j,k,l,m=[],n=[],o=g.length,p=f||ta(b||"*",h.nodeType?[h]:h,[]),q=!a||!f&&b?p:ua(p,m,a,h,i),r=c?e||(f?a:o||d)?[]:g:q;if(c&&c(q,r,h,i),d){j=ua(r,n),d(j,[],h,i),k=j.length;while(k--)(l=j[k])&&(r[n[k]]=!(q[n[k]]=l))}if(f){if(e||a){if(e){j=[],k=r.length;while(k--)(l=r[k])&&j.push(q[k]=l);e(null,r=[],j,i)}k=r.length;while(k--)(l=r[k])&&(j=e?J(f,l):m[k])>-1&&(f[j]=!(g[j]=l))}}else r=ua(r===g?r.splice(o,r.length):r),e?e(null,g,r,i):H.apply(g,r)})}function wa(a){for(var b,c,e,f=a.length,g=d.relative[a[0].type],h=g||d.relative[" "],i=g?1:0,k=ra(function(a){return a===b},h,!0),l=ra(function(a){return J(b,a)>-1},h,!0),m=[function(a,c,d){var e=!g&&(d||c!==j)||((b=c).nodeType?k(a,c,d):l(a,c,d));return b=null,e}];f>i;i++)if(c=d.relative[a[i].type])m=[ra(sa(m),c)];else{if(c=d.filter[a[i].type].apply(null,a[i].matches),c[u]){for(e=++i;f>e;e++)if(d.relative[a[e].type])break;return va(i>1&&sa(m),i>1&&qa(a.slice(0,i-1).concat({value:" "===a[i-2].type?"*":""})).replace(Q,"$1"),c,e>i&&wa(a.slice(i,e)),f>e&&wa(a=a.slice(e)),f>e&&qa(a))}m.push(c)}return sa(m)}function xa(a,b){var c=b.length>0,e=a.length>0,f=function(f,g,h,i,k){var l,o,q,r=0,s="0",t=f&&[],u=[],v=j,x=f||e&&d.find.TAG("*",k),y=w+=null==v?1:Math.random()||.1,z=x.length;for(k&&(j=g===n||g||k);s!==z&&null!=(l=x[s]);s++){if(e&&l){o=0,g||l.ownerDocument===n||(m(l),h=!p);while(q=a[o++])if(q(l,g||n,h)){i.push(l);break}k&&(w=y)}c&&((l=!q&&l)&&r--,f&&t.push(l))}if(r+=s,c&&s!==r){o=0;while(q=b[o++])q(t,u,g,h);if(f){if(r>0)while(s--)t[s]||u[s]||(u[s]=F.call(i));u=ua(u)}H.apply(i,u),k&&!f&&u.length>0&&r+b.length>1&&fa.uniqueSort(i)}return k&&(w=y,j=v),t};return c?ha(f):f}return h=fa.compile=function(a,b){var c,d=[],e=[],f=A[a+" "];if(!f){b||(b=g(a)),c=b.length;while(c--)f=wa(b[c]),f[u]?d.push(f):e.push(f);f=A(a,xa(e,d)),f.selector=a}return f},i=fa.select=function(a,b,e,f){var i,j,k,l,m,n="function"==typeof a&&a,o=!f&&g(a=n.selector||a);if(e=e||[],1===o.length){if(j=o[0]=o[0].slice(0),j.length>2&&"ID"===(k=j[0]).type&&c.getById&&9===b.nodeType&&p&&d.relative[j[1].type]){if(b=(d.find.ID(k.matches[0].replace(ba,ca),b)||[])[0],!b)return e;n&&(b=b.parentNode),a=a.slice(j.shift().value.length)}i=W.needsContext.test(a)?0:j.length;while(i--){if(k=j[i],d.relative[l=k.type])break;if((m=d.find[l])&&(f=m(k.matches[0].replace(ba,ca),_.test(j[0].type)&&oa(b.parentNode)||b))){if(j.splice(i,1),a=f.length&&qa(j),!a)return H.apply(e,f),e;break}}}return(n||h(a,o))(f,b,!p,e,!b||_.test(a)&&oa(b.parentNode)||b),e},c.sortStable=u.split("").sort(B).join("")===u,c.detectDuplicates=!!l,m(),c.sortDetached=ia(function(a){return 1&a.compareDocumentPosition(n.createElement("div"))}),ia(function(a){return a.innerHTML="","#"===a.firstChild.getAttribute("href")})||ja("type|href|height|width",function(a,b,c){return c?void 0:a.getAttribute(b,"type"===b.toLowerCase()?1:2)}),c.attributes&&ia(function(a){return a.innerHTML="",a.firstChild.setAttribute("value",""),""===a.firstChild.getAttribute("value")})||ja("value",function(a,b,c){return c||"input"!==a.nodeName.toLowerCase()?void 0:a.defaultValue}),ia(function(a){return null==a.getAttribute("disabled")})||ja(K,function(a,b,c){var d;return c?void 0:a[b]===!0?b.toLowerCase():(d=a.getAttributeNode(b))&&d.specified?d.value:null}),fa}(a);n.find=t,n.expr=t.selectors,n.expr[":"]=n.expr.pseudos,n.uniqueSort=n.unique=t.uniqueSort,n.text=t.getText,n.isXMLDoc=t.isXML,n.contains=t.contains;var u=function(a,b,c){var d=[],e=void 0!==c;while((a=a[b])&&9!==a.nodeType)if(1===a.nodeType){if(e&&n(a).is(c))break;d.push(a)}return d},v=function(a,b){for(var c=[];a;a=a.nextSibling)1===a.nodeType&&a!==b&&c.push(a);return c},w=n.expr.match.needsContext,x=/^<([\w-]+)\s*\/?>(?:<\/\1>|)$/,y=/^.[^:#\[\.,]*$/;function z(a,b,c){if(n.isFunction(b))return n.grep(a,function(a,d){return!!b.call(a,d,a)!==c});if(b.nodeType)return n.grep(a,function(a){return a===b!==c});if("string"==typeof b){if(y.test(b))return n.filter(b,a,c);b=n.filter(b,a)}return n.grep(a,function(a){return n.inArray(a,b)>-1!==c})}n.filter=function(a,b,c){var d=b[0];return c&&(a=":not("+a+")"),1===b.length&&1===d.nodeType?n.find.matchesSelector(d,a)?[d]:[]:n.find.matches(a,n.grep(b,function(a){return 1===a.nodeType}))},n.fn.extend({find:function(a){var b,c=[],d=this,e=d.length;if("string"!=typeof a)return this.pushStack(n(a).filter(function(){for(b=0;e>b;b++)if(n.contains(d[b],this))return!0}));for(b=0;e>b;b++)n.find(a,d[b],c);return c=this.pushStack(e>1?n.unique(c):c),c.selector=this.selector?this.selector+" "+a:a,c},filter:function(a){return this.pushStack(z(this,a||[],!1))},not:function(a){return this.pushStack(z(this,a||[],!0))},is:function(a){return!!z(this,"string"==typeof a&&w.test(a)?n(a):a||[],!1).length}});var A,B=/^(?:\s*(<[\w\W]+>)[^>]*|#([\w-]*))$/,C=n.fn.init=function(a,b,c){var e,f;if(!a)return this;if(c=c||A,"string"==typeof a){if(e="<"===a.charAt(0)&&">"===a.charAt(a.length-1)&&a.length>=3?[null,a,null]:B.exec(a),!e||!e[1]&&b)return!b||b.jquery?(b||c).find(a):this.constructor(b).find(a);if(e[1]){if(b=b instanceof n?b[0]:b,n.merge(this,n.parseHTML(e[1],b&&b.nodeType?b.ownerDocument||b:d,!0)),x.test(e[1])&&n.isPlainObject(b))for(e in b)n.isFunction(this[e])?this[e](b[e]):this.attr(e,b[e]);return this}if(f=d.getElementById(e[2]),f&&f.parentNode){if(f.id!==e[2])return A.find(a);this.length=1,this[0]=f}return this.context=d,this.selector=a,this}return a.nodeType?(this.context=this[0]=a,this.length=1,this):n.isFunction(a)?"undefined"!=typeof c.ready?c.ready(a):a(n):(void 0!==a.selector&&(this.selector=a.selector,this.context=a.context),n.makeArray(a,this))};C.prototype=n.fn,A=n(d);var D=/^(?:parents|prev(?:Until|All))/,E={children:!0,contents:!0,next:!0,prev:!0};n.fn.extend({has:function(a){var b,c=n(a,this),d=c.length;return this.filter(function(){for(b=0;d>b;b++)if(n.contains(this,c[b]))return!0})},closest:function(a,b){for(var c,d=0,e=this.length,f=[],g=w.test(a)||"string"!=typeof a?n(a,b||this.context):0;e>d;d++)for(c=this[d];c&&c!==b;c=c.parentNode)if(c.nodeType<11&&(g?g.index(c)>-1:1===c.nodeType&&n.find.matchesSelector(c,a))){f.push(c);break}return this.pushStack(f.length>1?n.uniqueSort(f):f)},index:function(a){return a?"string"==typeof a?n.inArray(this[0],n(a)):n.inArray(a.jquery?a[0]:a,this):this[0]&&this[0].parentNode?this.first().prevAll().length:-1},add:function(a,b){return this.pushStack(n.uniqueSort(n.merge(this.get(),n(a,b))))},addBack:function(a){return this.add(null==a?this.prevObject:this.prevObject.filter(a))}});function F(a,b){do a=a[b];while(a&&1!==a.nodeType);return a}n.each({parent:function(a){var b=a.parentNode;return b&&11!==b.nodeType?b:null},parents:function(a){return u(a,"parentNode")},parentsUntil:function(a,b,c){return u(a,"parentNode",c)},next:function(a){return F(a,"nextSibling")},prev:function(a){return F(a,"previousSibling")},nextAll:function(a){return u(a,"nextSibling")},prevAll:function(a){return u(a,"previousSibling")},nextUntil:function(a,b,c){return u(a,"nextSibling",c)},prevUntil:function(a,b,c){return u(a,"previousSibling",c)},siblings:function(a){return v((a.parentNode||{}).firstChild,a)},children:function(a){return v(a.firstChild)},contents:function(a){return n.nodeName(a,"iframe")?a.contentDocument||a.contentWindow.document:n.merge([],a.childNodes)}},function(a,b){n.fn[a]=function(c,d){var e=n.map(this,b,c);return"Until"!==a.slice(-5)&&(d=c),d&&"string"==typeof d&&(e=n.filter(d,e)),this.length>1&&(E[a]||(e=n.uniqueSort(e)),D.test(a)&&(e=e.reverse())),this.pushStack(e)}});var G=/\S+/g;function H(a){var b={};return n.each(a.match(G)||[],function(a,c){b[c]=!0}),b}n.Callbacks=function(a){a="string"==typeof a?H(a):n.extend({},a);var b,c,d,e,f=[],g=[],h=-1,i=function(){for(e=a.once,d=b=!0;g.length;h=-1){c=g.shift();while(++h-1)f.splice(c,1),h>=c&&h--}),this},has:function(a){return a?n.inArray(a,f)>-1:f.length>0},empty:function(){return f&&(f=[]),this},disable:function(){return e=g=[],f=c="",this},disabled:function(){return!f},lock:function(){return e=!0,c||j.disable(),this},locked:function(){return!!e},fireWith:function(a,c){return e||(c=c||[],c=[a,c.slice?c.slice():c],g.push(c),b||i()),this},fire:function(){return j.fireWith(this,arguments),this},fired:function(){return!!d}};return j},n.extend({Deferred:function(a){var b=[["resolve","done",n.Callbacks("once memory"),"resolved"],["reject","fail",n.Callbacks("once memory"),"rejected"],["notify","progress",n.Callbacks("memory")]],c="pending",d={state:function(){return c},always:function(){return e.done(arguments).fail(arguments),this},then:function(){var a=arguments;return n.Deferred(function(c){n.each(b,function(b,f){var g=n.isFunction(a[b])&&a[b];e[f[1]](function(){var a=g&&g.apply(this,arguments);a&&n.isFunction(a.promise)?a.promise().progress(c.notify).done(c.resolve).fail(c.reject):c[f[0]+"With"](this===d?c.promise():this,g?[a]:arguments)})}),a=null}).promise()},promise:function(a){return null!=a?n.extend(a,d):d}},e={};return d.pipe=d.then,n.each(b,function(a,f){var g=f[2],h=f[3];d[f[1]]=g.add,h&&g.add(function(){c=h},b[1^a][2].disable,b[2][2].lock),e[f[0]]=function(){return e[f[0]+"With"](this===e?d:this,arguments),this},e[f[0]+"With"]=g.fireWith}),d.promise(e),a&&a.call(e,e),e},when:function(a){var b=0,c=e.call(arguments),d=c.length,f=1!==d||a&&n.isFunction(a.promise)?d:0,g=1===f?a:n.Deferred(),h=function(a,b,c){return function(d){b[a]=this,c[a]=arguments.length>1?e.call(arguments):d,c===i?g.notifyWith(b,c):--f||g.resolveWith(b,c)}},i,j,k;if(d>1)for(i=new Array(d),j=new Array(d),k=new Array(d);d>b;b++)c[b]&&n.isFunction(c[b].promise)?c[b].promise().progress(h(b,j,i)).done(h(b,k,c)).fail(g.reject):--f;return f||g.resolveWith(k,c),g.promise()}});var I;n.fn.ready=function(a){return n.ready.promise().done(a),this},n.extend({isReady:!1,readyWait:1,holdReady:function(a){a?n.readyWait++:n.ready(!0)},ready:function(a){(a===!0?--n.readyWait:n.isReady)||(n.isReady=!0,a!==!0&&--n.readyWait>0||(I.resolveWith(d,[n]),n.fn.triggerHandler&&(n(d).triggerHandler("ready"),n(d).off("ready"))))}});function J(){d.addEventListener?(d.removeEventListener("DOMContentLoaded",K),a.removeEventListener("load",K)):(d.detachEvent("onreadystatechange",K),a.detachEvent("onload",K))}function K(){(d.addEventListener||"load"===a.event.type||"complete"===d.readyState)&&(J(),n.ready())}n.ready.promise=function(b){if(!I)if(I=n.Deferred(),"complete"===d.readyState||"loading"!==d.readyState&&!d.documentElement.doScroll)a.setTimeout(n.ready);else if(d.addEventListener)d.addEventListener("DOMContentLoaded",K),a.addEventListener("load",K);else{d.attachEvent("onreadystatechange",K),a.attachEvent("onload",K);var c=!1;try{c=null==a.frameElement&&d.documentElement}catch(e){}c&&c.doScroll&&!function f(){if(!n.isReady){try{c.doScroll("left")}catch(b){return a.setTimeout(f,50)}J(),n.ready()}}()}return I.promise(b)},n.ready.promise();var L;for(L in n(l))break;l.ownFirst="0"===L,l.inlineBlockNeedsLayout=!1,n(function(){var a,b,c,e;c=d.getElementsByTagName("body")[0],c&&c.style&&(b=d.createElement("div"),e=d.createElement("div"),e.style.cssText="position:absolute;border:0;width:0;height:0;top:0;left:-9999px",c.appendChild(e).appendChild(b),"undefined"!=typeof b.style.zoom&&(b.style.cssText="display:inline;margin:0;border:0;padding:1px;width:1px;zoom:1",l.inlineBlockNeedsLayout=a=3===b.offsetWidth,a&&(c.style.zoom=1)),c.removeChild(e))}),function(){var a=d.createElement("div");l.deleteExpando=!0;try{delete a.test}catch(b){l.deleteExpando=!1}a=null}();var M=function(a){var b=n.noData[(a.nodeName+" ").toLowerCase()],c=+a.nodeType||1;return 1!==c&&9!==c?!1:!b||b!==!0&&a.getAttribute("classid")===b},N=/^(?:\{[\w\W]*\}|\[[\w\W]*\])$/,O=/([A-Z])/g;function P(a,b,c){if(void 0===c&&1===a.nodeType){var d="data-"+b.replace(O,"-$1").toLowerCase();if(c=a.getAttribute(d),"string"==typeof c){try{c="true"===c?!0:"false"===c?!1:"null"===c?null:+c+""===c?+c:N.test(c)?n.parseJSON(c):c}catch(e){}n.data(a,b,c)}else c=void 0; +}return c}function Q(a){var b;for(b in a)if(("data"!==b||!n.isEmptyObject(a[b]))&&"toJSON"!==b)return!1;return!0}function R(a,b,d,e){if(M(a)){var f,g,h=n.expando,i=a.nodeType,j=i?n.cache:a,k=i?a[h]:a[h]&&h;if(k&&j[k]&&(e||j[k].data)||void 0!==d||"string"!=typeof b)return k||(k=i?a[h]=c.pop()||n.guid++:h),j[k]||(j[k]=i?{}:{toJSON:n.noop}),"object"!=typeof b&&"function"!=typeof b||(e?j[k]=n.extend(j[k],b):j[k].data=n.extend(j[k].data,b)),g=j[k],e||(g.data||(g.data={}),g=g.data),void 0!==d&&(g[n.camelCase(b)]=d),"string"==typeof b?(f=g[b],null==f&&(f=g[n.camelCase(b)])):f=g,f}}function S(a,b,c){if(M(a)){var d,e,f=a.nodeType,g=f?n.cache:a,h=f?a[n.expando]:n.expando;if(g[h]){if(b&&(d=c?g[h]:g[h].data)){n.isArray(b)?b=b.concat(n.map(b,n.camelCase)):b in d?b=[b]:(b=n.camelCase(b),b=b in d?[b]:b.split(" ")),e=b.length;while(e--)delete d[b[e]];if(c?!Q(d):!n.isEmptyObject(d))return}(c||(delete g[h].data,Q(g[h])))&&(f?n.cleanData([a],!0):l.deleteExpando||g!=g.window?delete g[h]:g[h]=void 0)}}}n.extend({cache:{},noData:{"applet ":!0,"embed ":!0,"object ":"clsid:D27CDB6E-AE6D-11cf-96B8-444553540000"},hasData:function(a){return a=a.nodeType?n.cache[a[n.expando]]:a[n.expando],!!a&&!Q(a)},data:function(a,b,c){return R(a,b,c)},removeData:function(a,b){return S(a,b)},_data:function(a,b,c){return R(a,b,c,!0)},_removeData:function(a,b){return S(a,b,!0)}}),n.fn.extend({data:function(a,b){var c,d,e,f=this[0],g=f&&f.attributes;if(void 0===a){if(this.length&&(e=n.data(f),1===f.nodeType&&!n._data(f,"parsedAttrs"))){c=g.length;while(c--)g[c]&&(d=g[c].name,0===d.indexOf("data-")&&(d=n.camelCase(d.slice(5)),P(f,d,e[d])));n._data(f,"parsedAttrs",!0)}return e}return"object"==typeof a?this.each(function(){n.data(this,a)}):arguments.length>1?this.each(function(){n.data(this,a,b)}):f?P(f,a,n.data(f,a)):void 0},removeData:function(a){return this.each(function(){n.removeData(this,a)})}}),n.extend({queue:function(a,b,c){var d;return a?(b=(b||"fx")+"queue",d=n._data(a,b),c&&(!d||n.isArray(c)?d=n._data(a,b,n.makeArray(c)):d.push(c)),d||[]):void 0},dequeue:function(a,b){b=b||"fx";var c=n.queue(a,b),d=c.length,e=c.shift(),f=n._queueHooks(a,b),g=function(){n.dequeue(a,b)};"inprogress"===e&&(e=c.shift(),d--),e&&("fx"===b&&c.unshift("inprogress"),delete f.stop,e.call(a,g,f)),!d&&f&&f.empty.fire()},_queueHooks:function(a,b){var c=b+"queueHooks";return n._data(a,c)||n._data(a,c,{empty:n.Callbacks("once memory").add(function(){n._removeData(a,b+"queue"),n._removeData(a,c)})})}}),n.fn.extend({queue:function(a,b){var c=2;return"string"!=typeof a&&(b=a,a="fx",c--),arguments.lengthh;h++)b(a[h],c,g?d:d.call(a[h],h,b(a[h],c)));return e?a:j?b.call(a):i?b(a[0],c):f},Z=/^(?:checkbox|radio)$/i,$=/<([\w:-]+)/,_=/^$|\/(?:java|ecma)script/i,aa=/^\s+/,ba="abbr|article|aside|audio|bdi|canvas|data|datalist|details|dialog|figcaption|figure|footer|header|hgroup|main|mark|meter|nav|output|picture|progress|section|summary|template|time|video";function ca(a){var b=ba.split("|"),c=a.createDocumentFragment();if(c.createElement)while(b.length)c.createElement(b.pop());return c}!function(){var a=d.createElement("div"),b=d.createDocumentFragment(),c=d.createElement("input");a.innerHTML="
a",l.leadingWhitespace=3===a.firstChild.nodeType,l.tbody=!a.getElementsByTagName("tbody").length,l.htmlSerialize=!!a.getElementsByTagName("link").length,l.html5Clone="<:nav>"!==d.createElement("nav").cloneNode(!0).outerHTML,c.type="checkbox",c.checked=!0,b.appendChild(c),l.appendChecked=c.checked,a.innerHTML="",l.noCloneChecked=!!a.cloneNode(!0).lastChild.defaultValue,b.appendChild(a),c=d.createElement("input"),c.setAttribute("type","radio"),c.setAttribute("checked","checked"),c.setAttribute("name","t"),a.appendChild(c),l.checkClone=a.cloneNode(!0).cloneNode(!0).lastChild.checked,l.noCloneEvent=!!a.addEventListener,a[n.expando]=1,l.attributes=!a.getAttribute(n.expando)}();var da={option:[1,""],legend:[1,"
","
"],area:[1,"",""],param:[1,"",""],thead:[1,"","
"],tr:[2,"","
"],col:[2,"","
"],td:[3,"","
"],_default:l.htmlSerialize?[0,"",""]:[1,"X
","
"]};da.optgroup=da.option,da.tbody=da.tfoot=da.colgroup=da.caption=da.thead,da.th=da.td;function ea(a,b){var c,d,e=0,f="undefined"!=typeof a.getElementsByTagName?a.getElementsByTagName(b||"*"):"undefined"!=typeof a.querySelectorAll?a.querySelectorAll(b||"*"):void 0;if(!f)for(f=[],c=a.childNodes||a;null!=(d=c[e]);e++)!b||n.nodeName(d,b)?f.push(d):n.merge(f,ea(d,b));return void 0===b||b&&n.nodeName(a,b)?n.merge([a],f):f}function fa(a,b){for(var c,d=0;null!=(c=a[d]);d++)n._data(c,"globalEval",!b||n._data(b[d],"globalEval"))}var ga=/<|&#?\w+;/,ha=/r;r++)if(g=a[r],g||0===g)if("object"===n.type(g))n.merge(q,g.nodeType?[g]:g);else if(ga.test(g)){i=i||p.appendChild(b.createElement("div")),j=($.exec(g)||["",""])[1].toLowerCase(),m=da[j]||da._default,i.innerHTML=m[1]+n.htmlPrefilter(g)+m[2],f=m[0];while(f--)i=i.lastChild;if(!l.leadingWhitespace&&aa.test(g)&&q.push(b.createTextNode(aa.exec(g)[0])),!l.tbody){g="table"!==j||ha.test(g)?""!==m[1]||ha.test(g)?0:i:i.firstChild,f=g&&g.childNodes.length;while(f--)n.nodeName(k=g.childNodes[f],"tbody")&&!k.childNodes.length&&g.removeChild(k)}n.merge(q,i.childNodes),i.textContent="";while(i.firstChild)i.removeChild(i.firstChild);i=p.lastChild}else q.push(b.createTextNode(g));i&&p.removeChild(i),l.appendChecked||n.grep(ea(q,"input"),ia),r=0;while(g=q[r++])if(d&&n.inArray(g,d)>-1)e&&e.push(g);else if(h=n.contains(g.ownerDocument,g),i=ea(p.appendChild(g),"script"),h&&fa(i),c){f=0;while(g=i[f++])_.test(g.type||"")&&c.push(g)}return i=null,p}!function(){var b,c,e=d.createElement("div");for(b in{submit:!0,change:!0,focusin:!0})c="on"+b,(l[b]=c in a)||(e.setAttribute(c,"t"),l[b]=e.attributes[c].expando===!1);e=null}();var ka=/^(?:input|select|textarea)$/i,la=/^key/,ma=/^(?:mouse|pointer|contextmenu|drag|drop)|click/,na=/^(?:focusinfocus|focusoutblur)$/,oa=/^([^.]*)(?:\.(.+)|)/;function pa(){return!0}function qa(){return!1}function ra(){try{return d.activeElement}catch(a){}}function sa(a,b,c,d,e,f){var g,h;if("object"==typeof b){"string"!=typeof c&&(d=d||c,c=void 0);for(h in b)sa(a,h,c,d,b[h],f);return a}if(null==d&&null==e?(e=c,d=c=void 0):null==e&&("string"==typeof c?(e=d,d=void 0):(e=d,d=c,c=void 0)),e===!1)e=qa;else if(!e)return a;return 1===f&&(g=e,e=function(a){return n().off(a),g.apply(this,arguments)},e.guid=g.guid||(g.guid=n.guid++)),a.each(function(){n.event.add(this,b,e,d,c)})}n.event={global:{},add:function(a,b,c,d,e){var f,g,h,i,j,k,l,m,o,p,q,r=n._data(a);if(r){c.handler&&(i=c,c=i.handler,e=i.selector),c.guid||(c.guid=n.guid++),(g=r.events)||(g=r.events={}),(k=r.handle)||(k=r.handle=function(a){return"undefined"==typeof n||a&&n.event.triggered===a.type?void 0:n.event.dispatch.apply(k.elem,arguments)},k.elem=a),b=(b||"").match(G)||[""],h=b.length;while(h--)f=oa.exec(b[h])||[],o=q=f[1],p=(f[2]||"").split(".").sort(),o&&(j=n.event.special[o]||{},o=(e?j.delegateType:j.bindType)||o,j=n.event.special[o]||{},l=n.extend({type:o,origType:q,data:d,handler:c,guid:c.guid,selector:e,needsContext:e&&n.expr.match.needsContext.test(e),namespace:p.join(".")},i),(m=g[o])||(m=g[o]=[],m.delegateCount=0,j.setup&&j.setup.call(a,d,p,k)!==!1||(a.addEventListener?a.addEventListener(o,k,!1):a.attachEvent&&a.attachEvent("on"+o,k))),j.add&&(j.add.call(a,l),l.handler.guid||(l.handler.guid=c.guid)),e?m.splice(m.delegateCount++,0,l):m.push(l),n.event.global[o]=!0);a=null}},remove:function(a,b,c,d,e){var f,g,h,i,j,k,l,m,o,p,q,r=n.hasData(a)&&n._data(a);if(r&&(k=r.events)){b=(b||"").match(G)||[""],j=b.length;while(j--)if(h=oa.exec(b[j])||[],o=q=h[1],p=(h[2]||"").split(".").sort(),o){l=n.event.special[o]||{},o=(d?l.delegateType:l.bindType)||o,m=k[o]||[],h=h[2]&&new RegExp("(^|\\.)"+p.join("\\.(?:.*\\.|)")+"(\\.|$)"),i=f=m.length;while(f--)g=m[f],!e&&q!==g.origType||c&&c.guid!==g.guid||h&&!h.test(g.namespace)||d&&d!==g.selector&&("**"!==d||!g.selector)||(m.splice(f,1),g.selector&&m.delegateCount--,l.remove&&l.remove.call(a,g));i&&!m.length&&(l.teardown&&l.teardown.call(a,p,r.handle)!==!1||n.removeEvent(a,o,r.handle),delete k[o])}else for(o in k)n.event.remove(a,o+b[j],c,d,!0);n.isEmptyObject(k)&&(delete r.handle,n._removeData(a,"events"))}},trigger:function(b,c,e,f){var g,h,i,j,l,m,o,p=[e||d],q=k.call(b,"type")?b.type:b,r=k.call(b,"namespace")?b.namespace.split("."):[];if(i=m=e=e||d,3!==e.nodeType&&8!==e.nodeType&&!na.test(q+n.event.triggered)&&(q.indexOf(".")>-1&&(r=q.split("."),q=r.shift(),r.sort()),h=q.indexOf(":")<0&&"on"+q,b=b[n.expando]?b:new n.Event(q,"object"==typeof b&&b),b.isTrigger=f?2:3,b.namespace=r.join("."),b.rnamespace=b.namespace?new RegExp("(^|\\.)"+r.join("\\.(?:.*\\.|)")+"(\\.|$)"):null,b.result=void 0,b.target||(b.target=e),c=null==c?[b]:n.makeArray(c,[b]),l=n.event.special[q]||{},f||!l.trigger||l.trigger.apply(e,c)!==!1)){if(!f&&!l.noBubble&&!n.isWindow(e)){for(j=l.delegateType||q,na.test(j+q)||(i=i.parentNode);i;i=i.parentNode)p.push(i),m=i;m===(e.ownerDocument||d)&&p.push(m.defaultView||m.parentWindow||a)}o=0;while((i=p[o++])&&!b.isPropagationStopped())b.type=o>1?j:l.bindType||q,g=(n._data(i,"events")||{})[b.type]&&n._data(i,"handle"),g&&g.apply(i,c),g=h&&i[h],g&&g.apply&&M(i)&&(b.result=g.apply(i,c),b.result===!1&&b.preventDefault());if(b.type=q,!f&&!b.isDefaultPrevented()&&(!l._default||l._default.apply(p.pop(),c)===!1)&&M(e)&&h&&e[q]&&!n.isWindow(e)){m=e[h],m&&(e[h]=null),n.event.triggered=q;try{e[q]()}catch(s){}n.event.triggered=void 0,m&&(e[h]=m)}return b.result}},dispatch:function(a){a=n.event.fix(a);var b,c,d,f,g,h=[],i=e.call(arguments),j=(n._data(this,"events")||{})[a.type]||[],k=n.event.special[a.type]||{};if(i[0]=a,a.delegateTarget=this,!k.preDispatch||k.preDispatch.call(this,a)!==!1){h=n.event.handlers.call(this,a,j),b=0;while((f=h[b++])&&!a.isPropagationStopped()){a.currentTarget=f.elem,c=0;while((g=f.handlers[c++])&&!a.isImmediatePropagationStopped())a.rnamespace&&!a.rnamespace.test(g.namespace)||(a.handleObj=g,a.data=g.data,d=((n.event.special[g.origType]||{}).handle||g.handler).apply(f.elem,i),void 0!==d&&(a.result=d)===!1&&(a.preventDefault(),a.stopPropagation()))}return k.postDispatch&&k.postDispatch.call(this,a),a.result}},handlers:function(a,b){var c,d,e,f,g=[],h=b.delegateCount,i=a.target;if(h&&i.nodeType&&("click"!==a.type||isNaN(a.button)||a.button<1))for(;i!=this;i=i.parentNode||this)if(1===i.nodeType&&(i.disabled!==!0||"click"!==a.type)){for(d=[],c=0;h>c;c++)f=b[c],e=f.selector+" ",void 0===d[e]&&(d[e]=f.needsContext?n(e,this).index(i)>-1:n.find(e,this,null,[i]).length),d[e]&&d.push(f);d.length&&g.push({elem:i,handlers:d})}return h]","i"),va=/<(?!area|br|col|embed|hr|img|input|link|meta|param)(([\w:-]+)[^>]*)\/>/gi,wa=/\s*$/g,Aa=ca(d),Ba=Aa.appendChild(d.createElement("div"));function Ca(a,b){return n.nodeName(a,"table")&&n.nodeName(11!==b.nodeType?b:b.firstChild,"tr")?a.getElementsByTagName("tbody")[0]||a.appendChild(a.ownerDocument.createElement("tbody")):a}function Da(a){return a.type=(null!==n.find.attr(a,"type"))+"/"+a.type,a}function Ea(a){var b=ya.exec(a.type);return b?a.type=b[1]:a.removeAttribute("type"),a}function Fa(a,b){if(1===b.nodeType&&n.hasData(a)){var c,d,e,f=n._data(a),g=n._data(b,f),h=f.events;if(h){delete g.handle,g.events={};for(c in h)for(d=0,e=h[c].length;e>d;d++)n.event.add(b,c,h[c][d])}g.data&&(g.data=n.extend({},g.data))}}function Ga(a,b){var c,d,e;if(1===b.nodeType){if(c=b.nodeName.toLowerCase(),!l.noCloneEvent&&b[n.expando]){e=n._data(b);for(d in e.events)n.removeEvent(b,d,e.handle);b.removeAttribute(n.expando)}"script"===c&&b.text!==a.text?(Da(b).text=a.text,Ea(b)):"object"===c?(b.parentNode&&(b.outerHTML=a.outerHTML),l.html5Clone&&a.innerHTML&&!n.trim(b.innerHTML)&&(b.innerHTML=a.innerHTML)):"input"===c&&Z.test(a.type)?(b.defaultChecked=b.checked=a.checked,b.value!==a.value&&(b.value=a.value)):"option"===c?b.defaultSelected=b.selected=a.defaultSelected:"input"!==c&&"textarea"!==c||(b.defaultValue=a.defaultValue)}}function Ha(a,b,c,d){b=f.apply([],b);var e,g,h,i,j,k,m=0,o=a.length,p=o-1,q=b[0],r=n.isFunction(q);if(r||o>1&&"string"==typeof q&&!l.checkClone&&xa.test(q))return a.each(function(e){var f=a.eq(e);r&&(b[0]=q.call(this,e,f.html())),Ha(f,b,c,d)});if(o&&(k=ja(b,a[0].ownerDocument,!1,a,d),e=k.firstChild,1===k.childNodes.length&&(k=e),e||d)){for(i=n.map(ea(k,"script"),Da),h=i.length;o>m;m++)g=k,m!==p&&(g=n.clone(g,!0,!0),h&&n.merge(i,ea(g,"script"))),c.call(a[m],g,m);if(h)for(j=i[i.length-1].ownerDocument,n.map(i,Ea),m=0;h>m;m++)g=i[m],_.test(g.type||"")&&!n._data(g,"globalEval")&&n.contains(j,g)&&(g.src?n._evalUrl&&n._evalUrl(g.src):n.globalEval((g.text||g.textContent||g.innerHTML||"").replace(za,"")));k=e=null}return a}function Ia(a,b,c){for(var d,e=b?n.filter(b,a):a,f=0;null!=(d=e[f]);f++)c||1!==d.nodeType||n.cleanData(ea(d)),d.parentNode&&(c&&n.contains(d.ownerDocument,d)&&fa(ea(d,"script")),d.parentNode.removeChild(d));return a}n.extend({htmlPrefilter:function(a){return a.replace(va,"<$1>")},clone:function(a,b,c){var d,e,f,g,h,i=n.contains(a.ownerDocument,a);if(l.html5Clone||n.isXMLDoc(a)||!ua.test("<"+a.nodeName+">")?f=a.cloneNode(!0):(Ba.innerHTML=a.outerHTML,Ba.removeChild(f=Ba.firstChild)),!(l.noCloneEvent&&l.noCloneChecked||1!==a.nodeType&&11!==a.nodeType||n.isXMLDoc(a)))for(d=ea(f),h=ea(a),g=0;null!=(e=h[g]);++g)d[g]&&Ga(e,d[g]);if(b)if(c)for(h=h||ea(a),d=d||ea(f),g=0;null!=(e=h[g]);g++)Fa(e,d[g]);else Fa(a,f);return d=ea(f,"script"),d.length>0&&fa(d,!i&&ea(a,"script")),d=h=e=null,f},cleanData:function(a,b){for(var d,e,f,g,h=0,i=n.expando,j=n.cache,k=l.attributes,m=n.event.special;null!=(d=a[h]);h++)if((b||M(d))&&(f=d[i],g=f&&j[f])){if(g.events)for(e in g.events)m[e]?n.event.remove(d,e):n.removeEvent(d,e,g.handle);j[f]&&(delete j[f],k||"undefined"==typeof d.removeAttribute?d[i]=void 0:d.removeAttribute(i),c.push(f))}}}),n.fn.extend({domManip:Ha,detach:function(a){return Ia(this,a,!0)},remove:function(a){return Ia(this,a)},text:function(a){return Y(this,function(a){return void 0===a?n.text(this):this.empty().append((this[0]&&this[0].ownerDocument||d).createTextNode(a))},null,a,arguments.length)},append:function(){return Ha(this,arguments,function(a){if(1===this.nodeType||11===this.nodeType||9===this.nodeType){var b=Ca(this,a);b.appendChild(a)}})},prepend:function(){return Ha(this,arguments,function(a){if(1===this.nodeType||11===this.nodeType||9===this.nodeType){var b=Ca(this,a);b.insertBefore(a,b.firstChild)}})},before:function(){return Ha(this,arguments,function(a){this.parentNode&&this.parentNode.insertBefore(a,this)})},after:function(){return Ha(this,arguments,function(a){this.parentNode&&this.parentNode.insertBefore(a,this.nextSibling)})},empty:function(){for(var a,b=0;null!=(a=this[b]);b++){1===a.nodeType&&n.cleanData(ea(a,!1));while(a.firstChild)a.removeChild(a.firstChild);a.options&&n.nodeName(a,"select")&&(a.options.length=0)}return this},clone:function(a,b){return a=null==a?!1:a,b=null==b?a:b,this.map(function(){return n.clone(this,a,b)})},html:function(a){return Y(this,function(a){var b=this[0]||{},c=0,d=this.length;if(void 0===a)return 1===b.nodeType?b.innerHTML.replace(ta,""):void 0;if("string"==typeof a&&!wa.test(a)&&(l.htmlSerialize||!ua.test(a))&&(l.leadingWhitespace||!aa.test(a))&&!da[($.exec(a)||["",""])[1].toLowerCase()]){a=n.htmlPrefilter(a);try{for(;d>c;c++)b=this[c]||{},1===b.nodeType&&(n.cleanData(ea(b,!1)),b.innerHTML=a);b=0}catch(e){}}b&&this.empty().append(a)},null,a,arguments.length)},replaceWith:function(){var a=[];return Ha(this,arguments,function(b){var c=this.parentNode;n.inArray(this,a)<0&&(n.cleanData(ea(this)),c&&c.replaceChild(b,this))},a)}}),n.each({appendTo:"append",prependTo:"prepend",insertBefore:"before",insertAfter:"after",replaceAll:"replaceWith"},function(a,b){n.fn[a]=function(a){for(var c,d=0,e=[],f=n(a),h=f.length-1;h>=d;d++)c=d===h?this:this.clone(!0),n(f[d])[b](c),g.apply(e,c.get());return this.pushStack(e)}});var Ja,Ka={HTML:"block",BODY:"block"};function La(a,b){var c=n(b.createElement(a)).appendTo(b.body),d=n.css(c[0],"display");return c.detach(),d}function Ma(a){var b=d,c=Ka[a];return c||(c=La(a,b),"none"!==c&&c||(Ja=(Ja||n("