leomaurodesenv · leomaurodesenv · Jun 28, 2025 · Jun 28, 2025 · Jun 28, 2025 · Jun 28, 2025
@@ -0,0 +1,30 @@
+name: "Continuous Integration"
+run-name: Running tests on "${{ github.ref }}" by "${{ github.actor }}"
+on:
+  push:
+    branches:
+      - 'main'
+  pull_request:
+    types:
+      - opened
+      - reopened
+      - synchronize
+  workflow_dispatch:
+
+jobs:
+  # Run pre-commit hooks
+  pre-commit:
+    runs-on: ubuntu-22.04
+    steps:
+      - uses: actions/checkout@v4
+      - uses: astral-sh/setup-uv@v5
+      - run: uv sync
+      - run: uv run pre-commit run --all-files
+  # Run tests
+  tests:
+    runs-on: ubuntu-22.04
+    steps:
+      - uses: actions/checkout@v4
+      - uses: astral-sh/setup-uv@v5
+      - run: uv sync
+      - run: uv run pytest
@@ -182,9 +182,9 @@ cython_debug/
 .abstra/
 
 # Visual Studio Code
-#  Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore 
+#  Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
 #  that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
-#  and can be added to the global gitignore or merged into this file. However, if you prefer, 
+#  and can be added to the global gitignore or merged into this file. However, if you prefer,
 #  you could uncomment the following to ignore the entire vscode folder
 # .vscode/
 

@@ -0,0 +1,16 @@
+# See https://pre-commit.com/hooks.html for more hooks
+repos:
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v5.0.0
+    hooks:
+    -   id: trailing-whitespace
+    -   id: end-of-file-fixer
+    -   id: check-yaml
+    -   id: check-added-large-files
+    -   id: check-json
+    -   id: check-ast
+-   repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.12.1
+    hooks:
+    -   id: ruff-format
+        types_or: [ python, pyi ]
@@ -0,0 +1 @@
+3.10
@@ -0,0 +1,6 @@
+def main():
+    print("Hello from scikit-sampling!")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,40 @@
+[project]
+name = "scikit-sampling"
+license = "MIT"
+readme = "README.md"
+version = "0.1.0"
+requires-python = ">=3.10"
+description = "A set of python modules for dataset sampling"
+url = "https://github.com/leomaurodesenv/scikit-sampling"
+authors = [
+  {name = "Leonardo Moraes", email = "leomaurodesenv@users.noreply.github.com"}
+]
+classifiers=[
+  "Intended Audience :: Science/Research",
+  "Intended Audience :: Developers",
+  "Programming Language :: Python",
+  "Topic :: Scientific/Engineering",
+  "Development Status :: 4 - Beta",
+  "License :: OSI Approved :: MIT License",
+  "Programming Language :: Python :: 3",
+  "Programming Language :: Python :: 3.10",
+  "Programming Language :: Python :: 3.11",
+  "Programming Language :: Python :: 3.12",
+  "Programming Language :: Python :: 3.13",
+]
+
+dependencies = [
+    "scipy>=1.15.0,<2",
+]
+
+[dependency-groups]
+dev = [
+    "commitizen>=4.8.3,<5",
+    "pre-commit>=4.2.0,<5",
+    "pytest>=8.4.1,<9",
+]
+
+[tool.pytest.ini_options]
+pythonpath = [
+  "."
+]
@@ -0,0 +1,44 @@
+import math
+import scipy.stats as st
+
+
+def _get_z_score(confidence_level: float) -> float:
+    """
+    Retrieves the Z-score for a given confidence level.
+
+    Args:
+        confidence_level: The confidence level as a float (e.g., 0.95 for 95%).
+
+    Returns:
+        The Z-score for the given confidence level.
+    """
+    return st.norm.ppf(1 - (1 - confidence_level) / 2)
+
+
+def sample_size(
+    population_size: int,
+    confidence_level: float = 0.95,
+    confidence_interval: float = 0.02,
+) -> int:
+    """
+    Calculates the sample size for a finite population using Cochran's formula.
+
+    Args:
+        population_size: The total size of the population.
+        confidence_level: The desired confidence level (e.g., 0.95 for 95%).
+        confidence_interval: The desired confidence interval (margin of error).
+                             Default is 0.02 (2%).
+
+    Returns:
+        The calculated sample size as an integer.
+    """
+
+    # For sample size calculation, we assume the worst-case variance, where p=0.5
+    p = 0.5
+    z_score = _get_z_score(confidence_level)
+    # Calculate sample size for an infinite population
+    n_0 = (z_score**2 * p * (1 - p)) / (confidence_interval**2)
+    # Adjust sample size for the finite population
+    n = n_0 / (1 + (n_0 - 1) / population_size)
+
+    return int(math.ceil(n))
@@ -0,0 +1,31 @@
+import pytest
+from sksampling import _get_z_score, sample_size
+
+
+def test_sample_size_basic():
+    """
+    Tests the sample_size function with a few known sets of inputs and
+    expected outputs.
+    """
+    error_margin = 1
+    # Test case from original print statement
+    assert sample_size(100_000, 0.95, 0.02) == pytest.approx(2345, abs=error_margin)
+    # Test with a smaller population
+    assert sample_size(500, 0.95, 0.05) == pytest.approx(218, abs=error_margin)
+    # Test with higher confidence and smaller interval
+    assert sample_size(10_000, 0.99, 0.01) == pytest.approx(6239, abs=error_margin)
+    # Test with a very large population, approaching the infinite case
+    assert sample_size(1_000_000, 0.95, 0.05) == pytest.approx(385, abs=error_margin)
+
+
+def test_z_score():
+    """
+    Tests the _get_z_score helper function with common confidence levels.
+    """
+    error_margin = 1e-2
+    # Z-score for 90% confidence level should be approximately 1.645
+    assert _get_z_score(0.90) == pytest.approx(1.645, abs=error_margin)
+    # Z-score for 95% confidence level should be approximately 1.96
+    assert _get_z_score(0.95) == pytest.approx(1.96, abs=error_margin)
+    # Z-score for 99% confidence level should be approximately 2.58
+    assert _get_z_score(0.99) == pytest.approx(2.58, abs=error_margin)