Source code for PandaSurvey

"""PandaSurvey includes two unique datasets for testing purpuses: `People` and a sample study. The `People` file is from the 2010 US Census. The sample study is from a small survey performed at InContext Solutions in 2014 (specific survey details withheld)"""

import os
import pandas


def _path(name):
    root, _ = os.path.split(__file__)
    return os.path.join(root, 'data/' + name)


[docs]def load_people(): """Returns the `People` dataset as a DataFrame. The data consists of 9999 individuals with age, disability status, marital status, race, and gender demographic information. Columns and their codes are described below: - Age - Non-negative integer - May include zeros - Disability - 1: Disabled - 2: Not disabled - MarritalStatus - 1: Married - 2: Widowed - 3: Divorced - 4: Separated - 5: Never married or under 15 years old - Race - 1: White alone - 2: Black or African American alone - 3: American Indian alone - 4: Alaska Native alone - 5: American Indian and Alaska Native tribes specified; or American Indian or Alaska native, not specified and no other races - 6: Asian alone - 7: Native Hawaiian and Other Pacific Islander alone - 8: Some other race alone - 9: Two or more major race groups - Gender - 1: Male - 2: Female """ return pandas.read_csv(_path("People.csv"))
[docs]def load_sample_study(): """Returns a sample dataset describing demographics in coded format from 2092 respondents. The study consists of 7 cells and demographics considered include age, gender, income, hispanic, and race.""" df = pandas.read_csv(_path("SampleStudy.csv")) del df['Weight'] return df
[docs]def load_sample_weights(): """Returns individual weights from the sample survey calculated via a raking method previously implemented in R.""" df = pandas.read_csv(_path("SampleStudy.csv")) return df['Weight']
[docs]def load_sample_proportions(): """Returns the target sample proportions that correspond to the sample survey. +-------------+-------------+-------------------+ | Demographic | Coded Value | Target Proportion | +=============+=============+===================+ | Age | 1 | 0.07 | +-------------+-------------+-------------------+ | Age | 2 | 0.22 | +-------------+-------------+-------------------+ | Age | 3 | 0.2 | +-------------+-------------+-------------------+ | Age | 4 | 0.2 | +-------------+-------------+-------------------+ | Age | 5 | 0.21 | +-------------+-------------+-------------------+ | Gender | 1 | 0.5 | +-------------+-------------+-------------------+ | Gender | 2 | 0.5 | +-------------+-------------+-------------------+ | Income | 1 | 0.17 | +-------------+-------------+-------------------+ | Income | 2 | 0.21 | +-------------+-------------+-------------------+ | Income | 3 | 0.25 | +-------------+-------------+-------------------+ | Income | 4 | 0.16 | +-------------+-------------+-------------------+ | Income | 5 | 0.11 | +-------------+-------------+-------------------+ | Hispanic | 1 | 0.09 | +-------------+-------------+-------------------+ | Hispanic | 2 | 0.91 | +-------------+-------------+-------------------+ | Race | 0 | 0.15 | +-------------+-------------+-------------------+ | Race | 1 | 0.85 | +-------------+-------------+-------------------+ """ weights = {} with open(_path("SampleWeights.csv")) as csv_in: for line in csv_in: demo, category, proportion = line.split(',') if demo not in weights: weights[demo] = {} weights[demo][int(category)] = float(proportion) return weights