{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "___\n",
    "\n",
    "<a href='http://www.pieriandata.com'> <img src='../Pierian_Data_Logo.png' /></a>\n",
    "___"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "# Merging, Joining, and Concatenating\n",
    "\n",
    "There are 3 main ways of combining DataFrames together: Merging, Joining and Concatenating. In this lecture we will discuss these 3 methods with examples.\n",
    "\n",
    "____"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Example DataFrames"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "df1 = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'],\n",
    "                        'B': ['B0', 'B1', 'B2', 'B3'],\n",
    "                        'C': ['C0', 'C1', 'C2', 'C3'],\n",
    "                        'D': ['D0', 'D1', 'D2', 'D3']},\n",
    "                        index=[0, 1, 2, 3])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "df2 = pd.DataFrame({'A': ['A4', 'A5', 'A6', 'A7'],\n",
    "                        'B': ['B4', 'B5', 'B6', 'B7'],\n",
    "                        'C': ['C4', 'C5', 'C6', 'C7'],\n",
    "                        'D': ['D4', 'D5', 'D6', 'D7']},\n",
    "                         index=[4, 5, 6, 7]) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df3 = pd.DataFrame({'A': ['A8', 'A9', 'A10', 'A11'],\n",
    "                        'B': ['B8', 'B9', 'B10', 'B11'],\n",
    "                        'C': ['C8', 'C9', 'C10', 'C11'],\n",
    "                        'D': ['D8', 'D9', 'D10', 'D11']},\n",
    "                        index=[8, 9, 10, 11])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A0</td>\n",
       "      <td>B0</td>\n",
       "      <td>C0</td>\n",
       "      <td>D0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>A1</td>\n",
       "      <td>B1</td>\n",
       "      <td>C1</td>\n",
       "      <td>D1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>A2</td>\n",
       "      <td>B2</td>\n",
       "      <td>C2</td>\n",
       "      <td>D2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>A3</td>\n",
       "      <td>B3</td>\n",
       "      <td>C3</td>\n",
       "      <td>D3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    A   B   C   D\n",
       "0  A0  B0  C0  D0\n",
       "1  A1  B1  C1  D1\n",
       "2  A2  B2  C2  D2\n",
       "3  A3  B3  C3  D3"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>A4</td>\n",
       "      <td>B4</td>\n",
       "      <td>C4</td>\n",
       "      <td>D4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>A5</td>\n",
       "      <td>B5</td>\n",
       "      <td>C5</td>\n",
       "      <td>D5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>A6</td>\n",
       "      <td>B6</td>\n",
       "      <td>C6</td>\n",
       "      <td>D6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>A7</td>\n",
       "      <td>B7</td>\n",
       "      <td>C7</td>\n",
       "      <td>D7</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    A   B   C   D\n",
       "4  A4  B4  C4  D4\n",
       "5  A5  B5  C5  D5\n",
       "6  A6  B6  C6  D6\n",
       "7  A7  B7  C7  D7"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>A8</td>\n",
       "      <td>B8</td>\n",
       "      <td>C8</td>\n",
       "      <td>D8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>A9</td>\n",
       "      <td>B9</td>\n",
       "      <td>C9</td>\n",
       "      <td>D9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>A10</td>\n",
       "      <td>B10</td>\n",
       "      <td>C10</td>\n",
       "      <td>D10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>A11</td>\n",
       "      <td>B11</td>\n",
       "      <td>C11</td>\n",
       "      <td>D11</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      A    B    C    D\n",
       "8    A8   B8   C8   D8\n",
       "9    A9   B9   C9   D9\n",
       "10  A10  B10  C10  D10\n",
       "11  A11  B11  C11  D11"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df3"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Concatenation\n",
    "\n",
    "Concatenation basically glues together DataFrames. Keep in mind that dimensions should match along the axis you are concatenating on. You can use **pd.concat** and pass in a list of DataFrames to concatenate together:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A0</td>\n",
       "      <td>B0</td>\n",
       "      <td>C0</td>\n",
       "      <td>D0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>A1</td>\n",
       "      <td>B1</td>\n",
       "      <td>C1</td>\n",
       "      <td>D1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>A2</td>\n",
       "      <td>B2</td>\n",
       "      <td>C2</td>\n",
       "      <td>D2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>A3</td>\n",
       "      <td>B3</td>\n",
       "      <td>C3</td>\n",
       "      <td>D3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>A4</td>\n",
       "      <td>B4</td>\n",
       "      <td>C4</td>\n",
       "      <td>D4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>A5</td>\n",
       "      <td>B5</td>\n",
       "      <td>C5</td>\n",
       "      <td>D5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>A6</td>\n",
       "      <td>B6</td>\n",
       "      <td>C6</td>\n",
       "      <td>D6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>A7</td>\n",
       "      <td>B7</td>\n",
       "      <td>C7</td>\n",
       "      <td>D7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>A8</td>\n",
       "      <td>B8</td>\n",
       "      <td>C8</td>\n",
       "      <td>D8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>A9</td>\n",
       "      <td>B9</td>\n",
       "      <td>C9</td>\n",
       "      <td>D9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>A10</td>\n",
       "      <td>B10</td>\n",
       "      <td>C10</td>\n",
       "      <td>D10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>A11</td>\n",
       "      <td>B11</td>\n",
       "      <td>C11</td>\n",
       "      <td>D11</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      A    B    C    D\n",
       "0    A0   B0   C0   D0\n",
       "1    A1   B1   C1   D1\n",
       "2    A2   B2   C2   D2\n",
       "3    A3   B3   C3   D3\n",
       "4    A4   B4   C4   D4\n",
       "5    A5   B5   C5   D5\n",
       "6    A6   B6   C6   D6\n",
       "7    A7   B7   C7   D7\n",
       "8    A8   B8   C8   D8\n",
       "9    A9   B9   C9   D9\n",
       "10  A10  B10  C10  D10\n",
       "11  A11  B11  C11  D11"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.concat([df1,df2,df3])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A0</td>\n",
       "      <td>B0</td>\n",
       "      <td>C0</td>\n",
       "      <td>D0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>A1</td>\n",
       "      <td>B1</td>\n",
       "      <td>C1</td>\n",
       "      <td>D1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>A2</td>\n",
       "      <td>B2</td>\n",
       "      <td>C2</td>\n",
       "      <td>D2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>A3</td>\n",
       "      <td>B3</td>\n",
       "      <td>C3</td>\n",
       "      <td>D3</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>A4</td>\n",
       "      <td>B4</td>\n",
       "      <td>C4</td>\n",
       "      <td>D4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>A5</td>\n",
       "      <td>B5</td>\n",
       "      <td>C5</td>\n",
       "      <td>D5</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>A6</td>\n",
       "      <td>B6</td>\n",
       "      <td>C6</td>\n",
       "      <td>D6</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>A7</td>\n",
       "      <td>B7</td>\n",
       "      <td>C7</td>\n",
       "      <td>D7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>A8</td>\n",
       "      <td>B8</td>\n",
       "      <td>C8</td>\n",
       "      <td>D8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>A9</td>\n",
       "      <td>B9</td>\n",
       "      <td>C9</td>\n",
       "      <td>D9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>A10</td>\n",
       "      <td>B10</td>\n",
       "      <td>C10</td>\n",
       "      <td>D10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>A11</td>\n",
       "      <td>B11</td>\n",
       "      <td>C11</td>\n",
       "      <td>D11</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      A    B    C    D    A    B    C    D    A    B    C    D\n",
       "0    A0   B0   C0   D0  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN\n",
       "1    A1   B1   C1   D1  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN\n",
       "2    A2   B2   C2   D2  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN\n",
       "3    A3   B3   C3   D3  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN\n",
       "4   NaN  NaN  NaN  NaN   A4   B4   C4   D4  NaN  NaN  NaN  NaN\n",
       "5   NaN  NaN  NaN  NaN   A5   B5   C5   D5  NaN  NaN  NaN  NaN\n",
       "6   NaN  NaN  NaN  NaN   A6   B6   C6   D6  NaN  NaN  NaN  NaN\n",
       "7   NaN  NaN  NaN  NaN   A7   B7   C7   D7  NaN  NaN  NaN  NaN\n",
       "8   NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN   A8   B8   C8   D8\n",
       "9   NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN   A9   B9   C9   D9\n",
       "10  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  A10  B10  C10  D10\n",
       "11  NaN  NaN  NaN  NaN  NaN  NaN  NaN  NaN  A11  B11  C11  D11"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.concat([df1,df2,df3],axis=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "_____\n",
    "## Example DataFrames"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "left = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'],\n",
    "                     'A': ['A0', 'A1', 'A2', 'A3'],\n",
    "                     'B': ['B0', 'B1', 'B2', 'B3']})\n",
    "   \n",
    "right = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'],\n",
    "                          'C': ['C0', 'C1', 'C2', 'C3'],\n",
    "                          'D': ['D0', 'D1', 'D2', 'D3']})    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>key</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A0</td>\n",
       "      <td>B0</td>\n",
       "      <td>K0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>A1</td>\n",
       "      <td>B1</td>\n",
       "      <td>K1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>A2</td>\n",
       "      <td>B2</td>\n",
       "      <td>K2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>A3</td>\n",
       "      <td>B3</td>\n",
       "      <td>K3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    A   B key\n",
       "0  A0  B0  K0\n",
       "1  A1  B1  K1\n",
       "2  A2  B2  K2\n",
       "3  A3  B3  K3"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "left"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "      <th>key</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>C0</td>\n",
       "      <td>D0</td>\n",
       "      <td>K0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>C1</td>\n",
       "      <td>D1</td>\n",
       "      <td>K1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>C2</td>\n",
       "      <td>D2</td>\n",
       "      <td>K2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>C3</td>\n",
       "      <td>D3</td>\n",
       "      <td>K3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    C   D key\n",
       "0  C0  D0  K0\n",
       "1  C1  D1  K1\n",
       "2  C2  D2  K2\n",
       "3  C3  D3  K3"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "right"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "___"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Merging\n",
    "\n",
    "The **merge** function allows you to merge DataFrames together using a similar logic as merging SQL Tables together. For example:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>key</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A0</td>\n",
       "      <td>B0</td>\n",
       "      <td>K0</td>\n",
       "      <td>C0</td>\n",
       "      <td>D0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>A1</td>\n",
       "      <td>B1</td>\n",
       "      <td>K1</td>\n",
       "      <td>C1</td>\n",
       "      <td>D1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>A2</td>\n",
       "      <td>B2</td>\n",
       "      <td>K2</td>\n",
       "      <td>C2</td>\n",
       "      <td>D2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>A3</td>\n",
       "      <td>B3</td>\n",
       "      <td>K3</td>\n",
       "      <td>C3</td>\n",
       "      <td>D3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    A   B key   C   D\n",
       "0  A0  B0  K0  C0  D0\n",
       "1  A1  B1  K1  C1  D1\n",
       "2  A2  B2  K2  C2  D2\n",
       "3  A3  B3  K3  C3  D3"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.merge(left,right,how='inner',on='key')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Or to show a more complicated example:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "left = pd.DataFrame({'key1': ['K0', 'K0', 'K1', 'K2'],\n",
    "                     'key2': ['K0', 'K1', 'K0', 'K1'],\n",
    "                        'A': ['A0', 'A1', 'A2', 'A3'],\n",
    "                        'B': ['B0', 'B1', 'B2', 'B3']})\n",
    "    \n",
    "right = pd.DataFrame({'key1': ['K0', 'K1', 'K1', 'K2'],\n",
    "                               'key2': ['K0', 'K0', 'K0', 'K0'],\n",
    "                                  'C': ['C0', 'C1', 'C2', 'C3'],\n",
    "                                  'D': ['D0', 'D1', 'D2', 'D3']})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>key1</th>\n",
       "      <th>key2</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A0</td>\n",
       "      <td>B0</td>\n",
       "      <td>K0</td>\n",
       "      <td>K0</td>\n",
       "      <td>C0</td>\n",
       "      <td>D0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>A2</td>\n",
       "      <td>B2</td>\n",
       "      <td>K1</td>\n",
       "      <td>K0</td>\n",
       "      <td>C1</td>\n",
       "      <td>D1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>A2</td>\n",
       "      <td>B2</td>\n",
       "      <td>K1</td>\n",
       "      <td>K0</td>\n",
       "      <td>C2</td>\n",
       "      <td>D2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    A   B key1 key2   C   D\n",
       "0  A0  B0   K0   K0  C0  D0\n",
       "1  A2  B2   K1   K0  C1  D1\n",
       "2  A2  B2   K1   K0  C2  D2"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.merge(left, right, on=['key1', 'key2'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>key1</th>\n",
       "      <th>key2</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A0</td>\n",
       "      <td>B0</td>\n",
       "      <td>K0</td>\n",
       "      <td>K0</td>\n",
       "      <td>C0</td>\n",
       "      <td>D0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>A1</td>\n",
       "      <td>B1</td>\n",
       "      <td>K0</td>\n",
       "      <td>K1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>A2</td>\n",
       "      <td>B2</td>\n",
       "      <td>K1</td>\n",
       "      <td>K0</td>\n",
       "      <td>C1</td>\n",
       "      <td>D1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>A2</td>\n",
       "      <td>B2</td>\n",
       "      <td>K1</td>\n",
       "      <td>K0</td>\n",
       "      <td>C2</td>\n",
       "      <td>D2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>A3</td>\n",
       "      <td>B3</td>\n",
       "      <td>K2</td>\n",
       "      <td>K1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>K2</td>\n",
       "      <td>K0</td>\n",
       "      <td>C3</td>\n",
       "      <td>D3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     A    B key1 key2    C    D\n",
       "0   A0   B0   K0   K0   C0   D0\n",
       "1   A1   B1   K0   K1  NaN  NaN\n",
       "2   A2   B2   K1   K0   C1   D1\n",
       "3   A2   B2   K1   K0   C2   D2\n",
       "4   A3   B3   K2   K1  NaN  NaN\n",
       "5  NaN  NaN   K2   K0   C3   D3"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.merge(left, right, how='outer', on=['key1', 'key2'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>key1</th>\n",
       "      <th>key2</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A0</td>\n",
       "      <td>B0</td>\n",
       "      <td>K0</td>\n",
       "      <td>K0</td>\n",
       "      <td>C0</td>\n",
       "      <td>D0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>A2</td>\n",
       "      <td>B2</td>\n",
       "      <td>K1</td>\n",
       "      <td>K0</td>\n",
       "      <td>C1</td>\n",
       "      <td>D1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>A2</td>\n",
       "      <td>B2</td>\n",
       "      <td>K1</td>\n",
       "      <td>K0</td>\n",
       "      <td>C2</td>\n",
       "      <td>D2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>K2</td>\n",
       "      <td>K0</td>\n",
       "      <td>C3</td>\n",
       "      <td>D3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     A    B key1 key2   C   D\n",
       "0   A0   B0   K0   K0  C0  D0\n",
       "1   A2   B2   K1   K0  C1  D1\n",
       "2   A2   B2   K1   K0  C2  D2\n",
       "3  NaN  NaN   K2   K0  C3  D3"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.merge(left, right, how='right', on=['key1', 'key2'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>key1</th>\n",
       "      <th>key2</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A0</td>\n",
       "      <td>B0</td>\n",
       "      <td>K0</td>\n",
       "      <td>K0</td>\n",
       "      <td>C0</td>\n",
       "      <td>D0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>A1</td>\n",
       "      <td>B1</td>\n",
       "      <td>K0</td>\n",
       "      <td>K1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>A2</td>\n",
       "      <td>B2</td>\n",
       "      <td>K1</td>\n",
       "      <td>K0</td>\n",
       "      <td>C1</td>\n",
       "      <td>D1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>A2</td>\n",
       "      <td>B2</td>\n",
       "      <td>K1</td>\n",
       "      <td>K0</td>\n",
       "      <td>C2</td>\n",
       "      <td>D2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>A3</td>\n",
       "      <td>B3</td>\n",
       "      <td>K2</td>\n",
       "      <td>K1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    A   B key1 key2    C    D\n",
       "0  A0  B0   K0   K0   C0   D0\n",
       "1  A1  B1   K0   K1  NaN  NaN\n",
       "2  A2  B2   K1   K0   C1   D1\n",
       "3  A2  B2   K1   K0   C2   D2\n",
       "4  A3  B3   K2   K1  NaN  NaN"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.merge(left, right, how='left', on=['key1', 'key2'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Joining\n",
    "Joining is a convenient method for combining the columns of two potentially differently-indexed DataFrames into a single result DataFrame."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "left = pd.DataFrame({'A': ['A0', 'A1', 'A2'],\n",
    "                     'B': ['B0', 'B1', 'B2']},\n",
    "                      index=['K0', 'K1', 'K2']) \n",
    "\n",
    "right = pd.DataFrame({'C': ['C0', 'C2', 'C3'],\n",
    "                    'D': ['D0', 'D2', 'D3']},\n",
    "                      index=['K0', 'K2', 'K3'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>K0</th>\n",
       "      <td>A0</td>\n",
       "      <td>B0</td>\n",
       "      <td>C0</td>\n",
       "      <td>D0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>K1</th>\n",
       "      <td>A1</td>\n",
       "      <td>B1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>K2</th>\n",
       "      <td>A2</td>\n",
       "      <td>B2</td>\n",
       "      <td>C2</td>\n",
       "      <td>D2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     A   B    C    D\n",
       "K0  A0  B0   C0   D0\n",
       "K1  A1  B1  NaN  NaN\n",
       "K2  A2  B2   C2   D2"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "left.join(right)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>K0</th>\n",
       "      <td>A0</td>\n",
       "      <td>B0</td>\n",
       "      <td>C0</td>\n",
       "      <td>D0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>K1</th>\n",
       "      <td>A1</td>\n",
       "      <td>B1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>K2</th>\n",
       "      <td>A2</td>\n",
       "      <td>B2</td>\n",
       "      <td>C2</td>\n",
       "      <td>D2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>K3</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>C3</td>\n",
       "      <td>D3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      A    B    C    D\n",
       "K0   A0   B0   C0   D0\n",
       "K1   A1   B1  NaN  NaN\n",
       "K2   A2   B2   C2   D2\n",
       "K3  NaN  NaN   C3   D3"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "left.join(right, how='outer')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Great Job!"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
