import * as React from 'react'
  /* @jsx mdx */
import { mdx } from '@mdx-js/react';
/* @jsxRuntime classic */

/* @jsx mdx */

import { Vimeo } from "@swizec/gatsby-theme-course-platform";
export const _frontmatter = {};
const layoutProps = {
  _frontmatter
};
const MDXLayout = "wrapper";
export default function MDXContent({
  components,
  ...props
}) {
  return <MDXLayout {...layoutProps} {...props} components={components} mdxType="MDXLayout">

    <h1 {...{
      "id": "asynchronously-load-data"
    }}>{`Asynchronously load data`}</h1>
    <Vimeo id={429666375} mdxType="Vimeo" />
    <p>{`Great! We have a preloader. Time to load some data.`}</p>
    <p>{`We'll use D3's built-in data loading methods and tie their promises into
React's component lifecycle. You could talk to a REST API in the same way.
Neither D3 nor React care what the datasource is.`}</p>
    <p>{`First, you need some data.`}</p>
    <p>{`Our dataset comes from a few sources. Tech salaries are from
`}<a parentName="p" {...{
        "href": "https://h1bdata.info"
      }}>{`h1bdata.info`}</a>{`, median household incomes come from the US
census data, and I got US geo map info from Mike Bostock's github repositories.
Some elbow grease and python scripts tied them all together.`}</p>
    <p>{`You can read about the scraping on my blog
`}<a parentName="p" {...{
        "href": "https://swizec.com/blog/place-names-county-names-geonames/swizec/7083"
      }}>{`here`}</a>{`,
`}<a parentName="p" {...{
        "href": "https://swizec.com/blog/facts-us-household-income/swizec/7075"
      }}>{`here`}</a>{`, and
`}<a parentName="p" {...{
        "href": "https://swizec.com/blog/livecoding-24-choropleth-react-js/swizec/7078"
      }}>{`here`}</a>{`.
But it's not the subject of this course.`}</p>
    <h2 {...{
      "id": "step-0-get-the-data"
    }}>{`Step 0: Get the data`}</h2>
    <Vimeo id={429666495} mdxType="Vimeo" />
    <p>{`Download the data files from
`}<a parentName="p" {...{
        "href": "https://github.com/Swizec/reactdataviz-project/tree/master/public/data"
      }}>{`my walkthrough repository on Github`}</a>{`.
Put them in your `}<inlineCode parentName="p">{`public/data`}</inlineCode>{` directory.`}</p>
    <h2 {...{
      "id": "step-1-prep-appjs"
    }}>{`Step 1: Prep App.js`}</h2>
    <Vimeo id={429666967} mdxType="Vimeo" />
    <p>{`Let's set up our `}<inlineCode parentName="p">{`App`}</inlineCode>{` component first. That way you'll see results as soon
data loading starts to work.`}</p>
    <p>{`Start by importing our data loading method - `}<inlineCode parentName="p">{`loadAllData`}</inlineCode>{` - and both D3 and
Lodash. We'll need them later.`}</p>
    <pre><code parentName="pre" {...{
        "className": "language-javascript"
      }}>{`// src/App.js
import React from "react"
// Insert the line(s) between here...
import * as d3 from "d3"
import _ from "lodash"
// ...and here.

import Preloader from "./components/Preloader"
// Insert the line(s) between here...
import { loadAllData } from "./DataHandling"
// ...and here.
`}</code></pre>
    <p>{`You already know about default imports. Importing with `}<inlineCode parentName="p">{`{}`}</inlineCode>{` is how we import
named exports. That lets us get multiple things from the same file. You'll see
the export side in Step 2.`}</p>
    <p>{`Don't worry about the missing `}<inlineCode parentName="p">{`DataHandling`}</inlineCode>{` file. It's coming soon.`}</p>
    <pre><code parentName="pre" {...{
        "className": "language-javascript"
      }}>{`// src/App.js
function App() {
    const [techSalaries, setTechSalaries] = useState([]);

    // Insert the line(s) between here...
    const [medianIncomes, setMedianIncomes] = useState([]);
    const [countyNames, setCountyNames] = useState([]);

    async function loadData() {
        const data = await loadAllData();

        const { techSalaries, medianIncomes, countyNames } = data;

        setTechSalaries(techSalaries);
        setMedianIncomes(medianIncomes);
        setCountyNames(countyNames);
    }

    useEffect(() => {
        loadData();
    }, []);
    // ...and here.
`}</code></pre>
    <p>{`We initiate data loading inside the `}<inlineCode parentName="p">{`useEffect`}</inlineCode>{` hook. It
fires when React first mounts our component into the DOM.`}</p>
    <p>{`I like to tie data loading to component mounts because it means you aren't
making requests you'll never use. In a bigger app, you'd use Redux, MobX, or
similar to decouple loading from rendering. Many reasons why.`}</p>
    <p>{`To load our data, we call the `}<inlineCode parentName="p">{`loadAllData`}</inlineCode>{` function, then use state setters
in the callback. This updates `}<inlineCode parentName="p">{`App`}</inlineCode>{`'s state and triggers a re-render, which
updates our entire visualization via props.`}</p>
    <p>{`We also add two more pieces of state: `}<inlineCode parentName="p">{`countyNames`}</inlineCode>{`, and
`}<inlineCode parentName="p">{`medianIncomes`}</inlineCode>{`. Defining what's in your component state in advance makes your
code easier to read. People, including you, know what to expect.`}</p>
    <p>{`Let's change rendering to show a message when our data finishes
loading.`}</p>
    <pre><code parentName="pre" {...{
        "className": "language-javascript"
      }}>{`// src/App.js
if (techSalaries.length < 1) {
  return <Preloader />
} else {
  return (
    <div className="App container">
      <h1>Loaded {techSalaries.length} salaries</h1>
    </div>
  )
}
`}</code></pre>
    <p>{`We added a `}<inlineCode parentName="p">{`container`}</inlineCode>{` class to the main `}<inlineCode parentName="p">{`<div>`}</inlineCode>{` and an `}<inlineCode parentName="p">{`<h1>`}</inlineCode>{` tag that shows
how many datapoints there are. You can use any valid JavaScript in curly braces
`}<inlineCode parentName="p">{`{}`}</inlineCode>{` and JSX will evaluate it. By convention we only use that ability to
calculate display values.`}</p>
    <p>{`You should now get an error overlay.`}</p>
    <p><img parentName="p" {...{
        "src": "https://raw.githubusercontent.com/Swizec/react-d3js-es6-ebook/2018-version/manuscript/resources/images/es6v2/datahandling-error.png",
        "alt": "DataHandling.js not found error overlay"
      }}></img></p>
    <p>{`These nice error overlays come with `}<inlineCode parentName="p">{`create-react-app`}</inlineCode>{` and make your code
easier to debug. No hunting around in the terminal to see compilation errors.`}</p>
    <p>{`Let's build that file and fill it with our data loading logic.`}</p>
    <h2 {...{
      "id": "step-2-prep-data-parsing-functions"
    }}>{`Step 2: Prep data parsing functions`}</h2>
    <Vimeo id={429667128} mdxType="Vimeo" />
    <p>{`We're putting data loading logic in a separate file from `}<inlineCode parentName="p">{`App.js`}</inlineCode>{` because it's
a bunch of functions that work together and don't have much to do with the
`}<inlineCode parentName="p">{`App`}</inlineCode>{` component itself.`}</p>
    <p>{`We start with two imports and four data parsing functions:`}</p>
    <ul>
      <li parentName="ul"><inlineCode parentName="li">{`cleanIncome`}</inlineCode>{`, which parses each row of household income data`}</li>
      <li parentName="ul"><inlineCode parentName="li">{`dateParse`}</inlineCode>{`, which we use for parsing dates`}</li>
      <li parentName="ul"><inlineCode parentName="li">{`cleanSalary`}</inlineCode>{`, which parses each row of salary data`}</li>
      <li parentName="ul"><inlineCode parentName="li">{`cleanUSStateName`}</inlineCode>{`, which parses US state names`}</li>
    </ul>
    <pre><code parentName="pre" {...{
        "className": "language-javascript"
      }}>{`// src/DataHandling.js
import * as d3 from "d3"
import _ from "lodash"

const cleanIncome = (d) => ({
  countyName: d["Name"],
  USstate: d["State"],
  medianIncome: Number(d["Median Household Income"]),
  lowerBound: Number(d["90% CI Lower Bound"]),
  upperBound: Number(d["90% CI Upper Bound"]),
})

const dateParse = d3.timeParse("%m/%d/%Y")

const cleanSalary = (d) => {
  if (!d["base salary"] || Number(d["base salary"]) > 300000) {
    return null
  }

  return {
    employer: d.employer,
    submit_date: dateParse(d["submit date"]),
    start_date: dateParse(d["start date"]),
    case_status: d["case status"],
    job_title: d["job title"],
    clean_job_title: d["job title"],
    base_salary: Number(d["base salary"]),
    city: d["city"],
    USstate: d["state"],
    county: d["county"],
    countyID: d["countyID"],
  }
}

const cleanUSStateName = (d) => ({
  code: d.code,
  id: Number(d.id),
  name: d.name,
})

const cleanCounty = (d) => ({
  id: Number(d.id),
  name: d.name,
})
`}</code></pre>
    <p>{`You'll see those `}<inlineCode parentName="p">{`d3`}</inlineCode>{` and `}<inlineCode parentName="p">{`lodash`}</inlineCode>{` imports a lot.`}</p>
    <p>{`Our data parsing functions all follow the same approach: Take a row of data as
`}<inlineCode parentName="p">{`d`}</inlineCode>{`, return a dictionary with nicer key names, cast any numbers or dates into
appropriate formats. They all start as strings.`}</p>
    <p>{`Doing all this parsing now, keeps the rest of our codebase clean. Handling data
is always messy. You want to contain that mess as much as possible.`}</p>
    <h2 {...{
      "id": "step-3-load-the-datasets"
    }}>{`Step 3: Load the datasets`}</h2>
    <Vimeo id={429667304} mdxType="Vimeo" />
    <p>{`Now we can use D3 to load our data with fetch requests.`}</p>
    <pre><code parentName="pre" {...{
        "className": "language-javascript"
      }}>{`// src/DataHandling.js
export const loadAllData = async () => {
  const datasets = await Promise.all([
    d3.json("data/us.json"),
    d3.csv("data/us-county-names-normalized.csv", cleanCounty),
    d3.csv("data/county-median-incomes.csv", cleanIncome),
    d3.csv("data/h1bs-2012-2016-shortened.csv", cleanSalary),
    d3.tsv("data/us-state-names.tsv", cleanUSStateName),
  ]) //.then(([us, countyNames, medianIncomes, techSalaries, USstateNames]) => {})
}
`}</code></pre>
    <p>{`In version 5, D3 updated its data loading methods to use promises instead of
callbacks. You can load a single file using
`}<inlineCode parentName="p">{`d3.csv("filename").then(data => ....`}</inlineCode>{`. The promise resolves with your data, or
throws an error.`}</p>
    <p>{`Each `}<inlineCode parentName="p">{`d3.csv`}</inlineCode>{` call makes a fetch request, parses the fetched CSV file into an
array of JavaScript dictionaries, and passes each row through the provided
cleanup function. We pass all median incomes through `}<inlineCode parentName="p">{`cleanIncome`}</inlineCode>{`, salaries
through `}<inlineCode parentName="p">{`cleanSalary`}</inlineCode>{`, etc.`}</p>
    <p>{`To load multiple files, we use `}<inlineCode parentName="p">{`Promise.all`}</inlineCode>{` with a list of unresolved
promises. Once resolved, our `}<inlineCode parentName="p">{`.then`}</inlineCode>{` handler gets a list of results. We use
array destructuring to expand that list into our respective datasets before
running some more logic to tie them together.`}</p>
    <p>{`D3 supports formats like `}<inlineCode parentName="p">{`json`}</inlineCode>{`, `}<inlineCode parentName="p">{`csv`}</inlineCode>{`, `}<inlineCode parentName="p">{`tsv`}</inlineCode>{`, `}<inlineCode parentName="p">{`text`}</inlineCode>{`, and `}<inlineCode parentName="p">{`xml`}</inlineCode>{` out of the
box. You can make it work with custom data sources through the underlying
`}<inlineCode parentName="p">{`request`}</inlineCode>{` API.`}</p>
    <p>{`PS: we're using the shortened salary dataset to make page reloads faster while
building our project.`}</p>
    <h2 {...{
      "id": "step-4-tie-the-datasets-together"
    }}>{`Step 4: Tie the datasets together`}</h2>
    <Vimeo id={429667661} mdxType="Vimeo" />
    <p>{`If you add a `}<inlineCode parentName="p">{`console.log`}</inlineCode>{` to the `}<inlineCode parentName="p">{`.then`}</inlineCode>{` callback above, you'll see a bunch of
data. Each argument - `}<inlineCode parentName="p">{`us`}</inlineCode>{`, `}<inlineCode parentName="p">{`countyNames`}</inlineCode>{`, `}<inlineCode parentName="p">{`medianIncomes`}</inlineCode>{`, `}<inlineCode parentName="p">{`techSalaries`}</inlineCode>{`,
`}<inlineCode parentName="p">{`USstateNames`}</inlineCode>{` - holds a parsed dataset from the corresponding file.`}</p>
    <p>{`To tie them together and prepare a dictionary for `}<inlineCode parentName="p">{`setState`}</inlineCode>{` back in the `}<inlineCode parentName="p">{`App`}</inlineCode>{`
component, we need to add some logic. We're building a dictionary of county
household incomes and removing any empty salaries.`}</p>
    <pre><code parentName="pre" {...{
        "className": "language-javascript"
      }}>{`// src/DataHandling.js
let [us, countyNames, medianIncomes, techSalaries, USstateNames] = datasets
    let medianIncomesMap = {};

    medianIncomes.filter(d => _.find(countyNames,
                                     {name: d['countyName']}))
                 .forEach((d) => {
                     d['countyID'] = _.find(countyNames,
                                            {name: d['countyName']}).id;
                     medianIncomesMap[d.countyID] = d;
                 });

    techSalaries = techSalaries.filter(d => !_.isNull(d));

    return {
        usTopoJson: us,
        countyNames: countyNames,
        medianIncomes: medianIncomesMap,
        medianIncomesByCounty: _.groupBy(medianIncomes, 'countyName'),
        medianIncomesByUSState: _.groupBy(medianIncomes, 'USstate'),
        techSalaries: techSalaries,
        USstateNames: USstateNames
    }
});

`}</code></pre>
    <p>{`Building the income map looks weird because of indentation, but it's not that
bad. We `}<inlineCode parentName="p">{`filter`}</inlineCode>{` the `}<inlineCode parentName="p">{`medianIncomes`}</inlineCode>{` array to discard any incomes whose
`}<inlineCode parentName="p">{`countyName`}</inlineCode>{` we can't find. I made sure they were all unique when I built the
datasets.`}</p>
    <p>{`We walk through the filtered array with a `}<inlineCode parentName="p">{`forEach`}</inlineCode>{`, find the correct
`}<inlineCode parentName="p">{`countyID`}</inlineCode>{`, and add each entry to `}<inlineCode parentName="p">{`medianIncomesMap`}</inlineCode>{`. When we're done, we have
a large dictionary that maps county ids to their household income data.`}</p>
    <p>{`Then we filter `}<inlineCode parentName="p">{`techSalaries`}</inlineCode>{` to remove any empty values where the
`}<inlineCode parentName="p">{`cleanSalaries`}</inlineCode>{` function returned `}<inlineCode parentName="p">{`null`}</inlineCode>{`. That happens when a salary is either
undefined or absurdly high.`}</p>
    <p>{`When our data is ready, we call our `}<inlineCode parentName="p">{`callback`}</inlineCode>{` with a dictionary of the new
datasets. To make future access quicker, we use `}<inlineCode parentName="p">{`_.groupBy`}</inlineCode>{` to build dictionary
maps of counties by county name and by US state.`}</p>
    <p>{`You should now see how many salary entries the shortened dataset contains.`}</p>
    <p><img parentName="p" {...{
        "src": "https://raw.githubusercontent.com/Swizec/react-d3js-es6-ebook/2018-version/manuscript/resources/images/es6v2/data-loaded-screenshot.png",
        "alt": "Data loaded screenshot"
      }}></img></p>
    <p>{`If that didn't work, try comparing your changes to this
`}<a parentName="p" {...{
        "href": "https://github.com/Swizec/reactdataviz-project/commit/dbc95baf4a3bb7c4ebd0b77a418d9900118ba686"
      }}>{`diff on Github`}</a>{`.`}</p>

    </MDXLayout>;
}
;
MDXContent.isMDXComponent = true;
      