#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Pre-Work of Workshop on Data Science of the Inmas, 2021.
"""

import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt

df= pd.read_csv('Salaries.csv')  # load data into a "DataFrame" data structure of the pandas package
catA=df['discipline']=='A'  # find indices that correspond to discipline == 'A'
dfA = df.loc[catA] # select rows of DataFrame "df" whose discipline value is 'A', assign these rows to new DataFrame "dfA"
dfB = df.loc[catA==False]  # select rows of DataFrame "df" whose discipline value is 'B', assign these rows to new DataFrame "dfB"