#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Pre-Work of Workshop on Data Science of the Inmas, 2021. """ import pandas as pd import numpy as np from sklearn.linear_model import LinearRegression import matplotlib.pyplot as plt df= pd.read_csv('Salaries.csv') # load data into a "DataFrame" data structure of the pandas package catA=df['discipline']=='A' # find indices that correspond to discipline == 'A' dfA = df.loc[catA] # select rows of DataFrame "df" whose discipline value is 'A', assign these rows to new DataFrame "dfA" dfB = df.loc[catA==False] # select rows of DataFrame "df" whose discipline value is 'B', assign these rows to new DataFrame "dfB"