archive.git - Gitblit

import numpy as np
import pytest
 
import pandas as pd
from pandas import (
    DataFrame,
    Series,
)
import pandas._testing as tm
 
 
@pytest.fixture
def m():
    return 5
 
 
@pytest.fixture
def n():
    return 100
 
 
@pytest.fixture
def cols():
    return ["jim", "joe", "jolie", "joline", "jolia"]
 
 
@pytest.fixture
def vals(n):
    vals = [
        np.random.default_rng(2).integers(0, 10, n),
        np.random.default_rng(2).choice(list("abcdefghij"), n),
        np.random.default_rng(2).choice(
            pd.date_range("20141009", periods=10).tolist(), n
        ),
        np.random.default_rng(2).choice(list("ZYXWVUTSRQ"), n),
        np.random.default_rng(2).standard_normal(n),
    ]
    vals = list(map(tuple, zip(*vals)))
    return vals
 
 
@pytest.fixture
def keys(n, m, vals):
    # bunch of keys for testing
    keys = [
        np.random.default_rng(2).integers(0, 11, m),
        np.random.default_rng(2).choice(list("abcdefghijk"), m),
        np.random.default_rng(2).choice(
            pd.date_range("20141009", periods=11).tolist(), m
        ),
        np.random.default_rng(2).choice(list("ZYXWVUTSRQP"), m),
    ]
    keys = list(map(tuple, zip(*keys)))
    keys += [t[:-1] for t in vals[:: n // m]]
    return keys
 
 
# covers both unique index and non-unique index
@pytest.fixture
def df(vals, cols):
    return DataFrame(vals, columns=cols)
 
 
@pytest.fixture
def a(df):
    return pd.concat([df, df])
 
 
@pytest.fixture
def b(df, cols):
    return df.drop_duplicates(subset=cols[:-1])
 
 
@pytest.mark.filterwarnings("ignore::pandas.errors.PerformanceWarning")
@pytest.mark.parametrize("lexsort_depth", list(range(5)))
@pytest.mark.parametrize("frame_fixture", ["a", "b"])
def test_multiindex_get_loc(request, lexsort_depth, keys, frame_fixture, cols):
    # GH7724, GH2646
 
    frame = request.getfixturevalue(frame_fixture)
    if lexsort_depth == 0:
        df = frame.copy(deep=False)
    else:
        df = frame.sort_values(by=cols[:lexsort_depth])
 
    mi = df.set_index(cols[:-1])
    assert not mi.index._lexsort_depth < lexsort_depth
    for key in keys:
        mask = np.ones(len(df), dtype=bool)
 
        # test for all partials of this key
        for i, k in enumerate(key):
            mask &= df.iloc[:, i] == k
 
            if not mask.any():
                assert key[: i + 1] not in mi.index
                continue
 
            assert key[: i + 1] in mi.index
            right = df[mask].copy(deep=False)
 
            if i + 1 != len(key):  # partial key
                return_value = right.drop(cols[: i + 1], axis=1, inplace=True)
                assert return_value is None
                return_value = right.set_index(cols[i + 1 : -1], inplace=True)
                assert return_value is None
                tm.assert_frame_equal(mi.loc[key[: i + 1]], right)
 
            else:  # full key
                return_value = right.set_index(cols[:-1], inplace=True)
                assert return_value is None
                if len(right) == 1:  # single hit
                    right = Series(
                        right["jolia"].values, name=right.index[0], index=["jolia"]
                    )
                    tm.assert_series_equal(mi.loc[key[: i + 1]], right)
                else:  # multi hit
                    tm.assert_frame_equal(mi.loc[key[: i + 1]], right)