Source code for PandaSurvey
"""PandaSurvey includes two unique datasets for testing purpuses: `People` and a sample study. The `People` file is from the 2010 US Census. The sample study is from a small survey performed at InContext Solutions in 2014 (specific survey details withheld)"""
import os
import pandas
def _path(name):
root, _ = os.path.split(__file__)
return os.path.join(root, 'data/' + name)
[docs]def load_people():
"""Returns the `People` dataset as a DataFrame. The data consists of 9999 individuals with age, disability status, marital status, race, and gender demographic information. Columns and their codes are described below:
- Age
- Non-negative integer
- May include zeros
- Disability
- 1: Disabled
- 2: Not disabled
- MarritalStatus
- 1: Married
- 2: Widowed
- 3: Divorced
- 4: Separated
- 5: Never married or under 15 years old
- Race
- 1: White alone
- 2: Black or African American alone
- 3: American Indian alone
- 4: Alaska Native alone
- 5: American Indian and Alaska Native tribes specified; or American Indian or Alaska native, not specified and no other races
- 6: Asian alone
- 7: Native Hawaiian and Other Pacific Islander alone
- 8: Some other race alone
- 9: Two or more major race groups
- Gender
- 1: Male
- 2: Female
"""
return pandas.read_csv(_path("People.csv"))
[docs]def load_sample_study():
"""Returns a sample dataset describing demographics in coded format from 2092 respondents. The study consists of 7 cells and demographics considered include age, gender, income, hispanic, and race."""
df = pandas.read_csv(_path("SampleStudy.csv"))
del df['Weight']
return df
[docs]def load_sample_weights():
"""Returns individual weights from the sample survey calculated via a raking method previously implemented in R."""
df = pandas.read_csv(_path("SampleStudy.csv"))
return df['Weight']
[docs]def load_sample_proportions():
"""Returns the target sample proportions that correspond to the sample survey.
+-------------+-------------+-------------------+
| Demographic | Coded Value | Target Proportion |
+=============+=============+===================+
| Age | 1 | 0.07 |
+-------------+-------------+-------------------+
| Age | 2 | 0.22 |
+-------------+-------------+-------------------+
| Age | 3 | 0.2 |
+-------------+-------------+-------------------+
| Age | 4 | 0.2 |
+-------------+-------------+-------------------+
| Age | 5 | 0.21 |
+-------------+-------------+-------------------+
| Gender | 1 | 0.5 |
+-------------+-------------+-------------------+
| Gender | 2 | 0.5 |
+-------------+-------------+-------------------+
| Income | 1 | 0.17 |
+-------------+-------------+-------------------+
| Income | 2 | 0.21 |
+-------------+-------------+-------------------+
| Income | 3 | 0.25 |
+-------------+-------------+-------------------+
| Income | 4 | 0.16 |
+-------------+-------------+-------------------+
| Income | 5 | 0.11 |
+-------------+-------------+-------------------+
| Hispanic | 1 | 0.09 |
+-------------+-------------+-------------------+
| Hispanic | 2 | 0.91 |
+-------------+-------------+-------------------+
| Race | 0 | 0.15 |
+-------------+-------------+-------------------+
| Race | 1 | 0.85 |
+-------------+-------------+-------------------+
"""
weights = {}
with open(_path("SampleWeights.csv")) as csv_in:
for line in csv_in:
demo, category, proportion = line.split(',')
if demo not in weights:
weights[demo] = {}
weights[demo][int(category)] = float(proportion)
return weights