Skip to content

Clean Datasets

Pipeline outputs are stored under data/clean in the data store. geojsonl/ holds line-delimited GeoJSON files for each geography type and conforms to the schema provided by the tax_credit_geography table. geoparquet/ holds Geoparquet files for each geography type and also conforms to the schema. mapbox/ holds metadata generated by syncing the geojsonl geographies with remote Mapbox tilesets. mapbox/mapbox_tilesets.json summarizes the current settings for each active tileset under the application's configured Mapbox account:

[
    {
        "bounds": [
            -180,
            -21.943046,
            180,
            74.019543
        ],
        "center": [
            -84.43575,
            36.93854035014232,
            5
        ],
        "created": 1703237021110,
        "created_by_client": "mts",
        "filesize": 28541591,
        "format": "pbf",
        "id": "<username>.cc_states",
        "mapbox_logo": true,
        "maxzoom": 5,
        "minzoom": 1,
        "modified": 1714859844459,
        "mts": true,
        "name": "states",
        "private": true,
        "scheme": "xyz",
        "tilejson": "2.2.0",
        "tiles": [
            "http://a.tiles.mapbox.com/v4/<username>.cc_states/{z}/{x}/{y}.vector.pbf?access_token=<secret_token>",
            "http://b.tiles.mapbox.com/v4/<username>.cc_states/{z}/{x}/{y}.vector.pbf?access_token=<secret_token>"
        ],
        "vector_layers": [
            {
                "fields": {
                    "as_of": "string",
                    "fips": "string",
                    "fips_pattern": "string",
                    "geography_type": "string",
                    "name": "string",
                    "population": "number",
                    "population_strategy": "string",
                    "published_on": "string",
                    "source": "string"
                },
                "id": "cc_states",
                "maxzoom": 22,
                "minzoom": 1,
                "source": "<username>.cc_states",
                "source_name": "states"
            }
        ],
        "webpage": "https://studio.mapbox.com/tilesets/<username>.cc_states"
    }
]

Finally, test holds datasets used for pipeline testing, and analysis is auto-generated whenever a Jupyter notebook saves output.