这是indexloc提供的服务,不要输入任何密码
Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions .github/workflows/continuous-integration.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
name: "Continuous Integration"
run-name: Running tests on "${{ github.ref }}" by "${{ github.actor }}"
on:
push:
branches:
- 'main'
pull_request:
types:
- opened
- reopened
- synchronize
workflow_dispatch:

jobs:
# Run pre-commit hooks
pre-commit:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
- uses: astral-sh/setup-uv@v5
- run: uv sync
- run: uv run pre-commit run --all-files
# Run tests
tests:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
- uses: astral-sh/setup-uv@v5
- run: uv sync
- run: uv run pytest
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -182,9 +182,9 @@ cython_debug/
.abstra/

# Visual Studio Code
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
# and can be added to the global gitignore or merged into this file. However, if you prefer,
# and can be added to the global gitignore or merged into this file. However, if you prefer,
# you could uncomment the following to ignore the entire vscode folder
# .vscode/

Expand Down
16 changes: 16 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# See https://pre-commit.com/hooks.html for more hooks
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v5.0.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-yaml
- id: check-added-large-files
- id: check-json
- id: check-ast
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.12.1
hooks:
- id: ruff-format
types_or: [ python, pyi ]
1 change: 1 addition & 0 deletions .python-version
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
3.10
Empty file added README.md
Empty file.
6 changes: 6 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
def main():
print("Hello from scikit-sampling!")


if __name__ == "__main__":
main()
40 changes: 40 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
[project]
name = "scikit-sampling"
license = "MIT"
readme = "README.md"
version = "0.1.0"
requires-python = ">=3.10"
description = "A set of python modules for dataset sampling"
url = "https://github.com/leomaurodesenv/scikit-sampling"
authors = [
{name = "Leonardo Moraes", email = "leomaurodesenv@users.noreply.github.com"}
]
classifiers=[
"Intended Audience :: Science/Research",
"Intended Audience :: Developers",
"Programming Language :: Python",
"Topic :: Scientific/Engineering",
"Development Status :: 4 - Beta",
"License :: OSI Approved :: MIT License",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
]

dependencies = [
"scipy>=1.15.0,<2",
]

[dependency-groups]
dev = [
"commitizen>=4.8.3,<5",
"pre-commit>=4.2.0,<5",
"pytest>=8.4.1,<9",
]

[tool.pytest.ini_options]
pythonpath = [
"."
]
44 changes: 44 additions & 0 deletions sksampling/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import math
import scipy.stats as st


def _get_z_score(confidence_level: float) -> float:
"""
Retrieves the Z-score for a given confidence level.

Args:
confidence_level: The confidence level as a float (e.g., 0.95 for 95%).

Returns:
The Z-score for the given confidence level.
"""
return st.norm.ppf(1 - (1 - confidence_level) / 2)


def sample_size(
population_size: int,
confidence_level: float = 0.95,
confidence_interval: float = 0.02,
) -> int:
"""
Calculates the sample size for a finite population using Cochran's formula.

Args:
population_size: The total size of the population.
confidence_level: The desired confidence level (e.g., 0.95 for 95%).
confidence_interval: The desired confidence interval (margin of error).
Default is 0.02 (2%).

Returns:
The calculated sample size as an integer.
"""

# For sample size calculation, we assume the worst-case variance, where p=0.5
p = 0.5
z_score = _get_z_score(confidence_level)
# Calculate sample size for an infinite population
n_0 = (z_score**2 * p * (1 - p)) / (confidence_interval**2)
# Adjust sample size for the finite population
n = n_0 / (1 + (n_0 - 1) / population_size)

return int(math.ceil(n))
31 changes: 31 additions & 0 deletions tests/test_sampling.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import pytest
from sksampling import _get_z_score, sample_size


def test_sample_size_basic():
"""
Tests the sample_size function with a few known sets of inputs and
expected outputs.
"""
error_margin = 1
# Test case from original print statement
assert sample_size(100_000, 0.95, 0.02) == pytest.approx(2345, abs=error_margin)
# Test with a smaller population
assert sample_size(500, 0.95, 0.05) == pytest.approx(218, abs=error_margin)
# Test with higher confidence and smaller interval
assert sample_size(10_000, 0.99, 0.01) == pytest.approx(6239, abs=error_margin)
# Test with a very large population, approaching the infinite case
assert sample_size(1_000_000, 0.95, 0.05) == pytest.approx(385, abs=error_margin)


def test_z_score():
"""
Tests the _get_z_score helper function with common confidence levels.
"""
error_margin = 1e-2
# Z-score for 90% confidence level should be approximately 1.645
assert _get_z_score(0.90) == pytest.approx(1.645, abs=error_margin)
# Z-score for 95% confidence level should be approximately 1.96
assert _get_z_score(0.95) == pytest.approx(1.96, abs=error_margin)
# Z-score for 99% confidence level should be approximately 2.58
assert _get_z_score(0.99) == pytest.approx(2.58, abs=error_margin)
Loading