with open( "data/user1.txt" ) as f:
    for _ in range( 10 ):
        print( f.readline(), end="" )
print( "... and so on." )

timestamp,date_id,dow,utc_time_sec,offset_time_sec,accuracy,longitude,latitude
2015-03-01 07:16:39,20150301,Sun,1425190599,-3600,10,-64.97920478,-15.68742782
2015-03-01 07:17:12,20150301,Sun,1425190632,-3600,29,-64.97919636,-15.68745956
2015-03-01 07:17:25,20150301,Sun,1425190645,-3600,33,-64.97922924,-15.68733408
2015-03-01 07:18:19,20150301,Sun,1425190699,-3600,25,-64.9791967,-15.68745926
2015-03-01 07:18:25,20150301,Sun,1425190705,-3600,17.6770000457764,-64.9791967,-15.68745926
2015-03-01 07:18:58,20150301,Sun,1425190738,-3600,36.6500015258789,-64.9792025,-15.68750054
2015-03-01 07:19:28,20150301,Sun,1425190768,-3600,69.0550003051758,-64.97919298,-15.68740562
2015-03-01 07:19:59,20150301,Sun,1425190799,-3600,25,-64.97919666,-15.68745918
2015-03-01 07:20:28,20150301,Sun,1425190828,-3600,66.8030014038086,-64.9791805,-15.68737422
... and so on.


import pandas as pd

data = pd.DataFrame()
for user_num in range(1, 5):
    dataframe = pd.read_csv('data/user%s.txt' % user_num, parse_dates=["timestamp"])
    dataframe.insert(0, "user", str(user_num))        
    data = data.append(dataframe)       
data


import plotly.express as px
from IPython.core.display import HTML

def caption( number, explanation ):
    display( HTML('<div style="text-align:center"><b>Figure %d.</b> %s</div>' % ( number, explanation ) ) )

fig = px.scatter(data, x="longitude", y="latitude", color="user")
fig.show()
caption( 1, "Scatter plot of users 1-4 location readings." )


for user_number in range( 1, 5 ):
    user_data = data[ data[ "user" ] == str( user_number ) ]  
    fig = px.density_heatmap(user_data, title="User %s Locations" % str( user_number ), x="longitude", y="latitude", marginal_x="histogram", marginal_y="histogram", color_continuous_scale = ["#000000", "#FF8C00"] )    
    fig.show()    
    caption( user_number+1, "Density Heatmap of User %d location readings." % user_number )


# For convenience, we'll be keeping these values in memory for the next steps
users = {
    "1": { "locations": {
            "1": { "lat": { "min": -15.30, "max": -15.25 }, "lon": { "min": -64.76, "max": -64.74 } },
            "2": { "lat": { "min": -15.70, "max": -15.65 }, "lon": { "min": -64.98, "max": -64.96 } }
        }        
    },
    "2": { "locations": {
            "1": { "lat": { "min": -15.26, "max": -15.24 }, "lon": { "min": -64.76, "max": -64.74 } },
            "2": { "lat": { "min": -15.08, "max": -15.06 }, "lon": { "min": -64.66, "max": -64.64 } },
        }        
    },
    "3": { "locations": {
            "1": { "lat": { "min": -15.26, "max": -15.24 }, "lon": { "min": -64.76, "max": -64.74 } },
            "2": { "lat": { "min": -15.46, "max": -15.44 }, "lon": { "min": -64.74, "max": -64.72 } }
        }        
    },
    "4": { "locations": {
            "1": { "lat": { "min": -15.26, "max": -15.24 }, "lon": { "min": -64.76, "max": -64.74 } },
            "2": { "lat": { "min": -15.04, "max": -15.02 }, "lon": { "min": -64.61, "max": -64.60 } }
        }        
    }
}


for user_number in range( 1, 5 ):
    user_id = str( user_number )
    
    # get the data
    loc1 = users[ user_id ]["locations"][ "1" ]
    loc2 = users[ user_id ]["locations"][ "2" ]    
    user_readings_loc1 = data[ ( data[ "user" ] == user_id ) & data[ 'latitude' ].between( loc1["lat"]["min"], loc1["lat"]["max"] ) & data[ 'longitude' ].between( loc1["lon"]["min"], loc1["lon"]["max"] ) ]
    user_readings_loc2 = data[ ( data[ "user" ] == user_id ) & data[ 'latitude' ].between( loc2["lat"]["min"], loc2["lat"]["max"] ) & data[ 'longitude' ].between( loc2["lon"]["min"], loc2["lon"]["max"] ) ]
    
    # Add a new column to these two dataframes with a label for the location, and join them into a single dataframe for the current user
    user_readings_loc1.insert( 0, "location", "Location 1" )
    user_readings_loc2.insert( 0, "location", "Location 2" )
    user_readings_locations = pd.concat( [ user_readings_loc1, user_readings_loc2 ] )
    
    # Show it
    fig1 = px.histogram( user_readings_locations, title="User %s readings at locations 1 and 2, per Day of Week" % user_id, x="dow", color="location", category_orders={"dow": ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]} )    
    fig1.show()
    caption( user_number+5, "Histogram depicting the number of readings at locations 1 and 2 for User %d." % user_number )


for user_number in range( 1, 5 ):    
    user_id = str( user_number )
    
    # get the data
    loc1 = users[ user_id ]["locations"][ "1" ]
    loc2 = users[ user_id ]["locations"][ "2" ]    
    user_readings_loc1 = data[ ( data[ "user" ] == user_id ) & data[ 'latitude' ].between( loc1["lat"]["min"], loc1["lat"]["max"] ) & data[ 'longitude' ].between( loc1["lon"]["min"], loc1["lon"]["max"] ) ]
    user_readings_loc2 = data[ ( data[ "user" ] == user_id ) & data[ 'latitude' ].between( loc2["lat"]["min"], loc2["lat"]["max"] ) & data[ 'longitude' ].between( loc2["lon"]["min"], loc2["lon"]["max"] ) ]
    
    # add the location label to the readings and join them into a single dataframe
    user_readings_loc1.insert( 0, "location", "Workplace" )
    user_readings_loc2.insert( 0, "location", "Home" )
    user_readings_locations = pd.concat( [ user_readings_loc1, user_readings_loc2 ] )    
    
    # get only the readings for working days
    user_readings_locations = user_readings_locations[ user_readings_locations[ "dow" ].isin( ["Mon", "Tue", "Wed", "Thu", "Fri"] ) ]
    user_readings_locations["hour"] = user_readings_locations["timestamp"].dt.hour   
    
    # show it
    fig = px.density_heatmap(user_readings_locations, title="User %s Readings, per Hour and Location" % user_id, x="hour", y="location", color_continuous_scale = ["#FFFFFF", "#006400"], height=300)
    fig.show()
    caption( user_number+9, "Heatmap illustrating the number of readings at locations Home and Workplace, for User %d." % user_number )

	Location 1		Location 2
	Latitude (range)	Longitude (range)	Latitude (range)	Longitude (range)
User 1	[-15.30, -15.25]	[-64.76, -64.74]	[-15.70, -15.65]	[-64.98, -64.96]
User 2	[-15.26, -15.24]	[-64.76, -64.74]	[-15.08, -15.06]	[-64.66, -64.64]
User 3	[-15.26, -15.24]	[-64.76, -64.74]	[-15.46, -15.44]	[-64.74, -64.72]
User 4	[-15.26, -15.24]	[-64.76, -64.74]	[-15.04, -15.02]	[-64.61, -64.60]

Step 1 - First Impressions

Notes

Step 2 - Loading the Data

Notes

Step 3 - Plotting Movement

Notes

Step 4 - Finding Important Locations

Notes

Step 5 - Adding Meaning to Locations

Notes

Step 6 - User Routines

Conclusions

	user	timestamp	date_id	dow	utc_time_sec	offset_time_sec	accuracy	longitude	latitude
0	1	2015-03-01 07:16:39	20150301	Sun	1425190599	-3600	10.000000	-64.979205	-15.687428
1	1	2015-03-01 07:17:12	20150301	Sun	1425190632	-3600	29.000000	-64.979196	-15.687460
2	1	2015-03-01 07:17:25	20150301	Sun	1425190645	-3600	33.000000	-64.979229	-15.687334
3	1	2015-03-01 07:18:19	20150301	Sun	1425190699	-3600	25.000000	-64.979197	-15.687459
4	1	2015-03-01 07:18:25	20150301	Sun	1425190705	-3600	17.677000	-64.979197	-15.687459
...	...	...	...	...	...	...	...	...	...
30603	4	2015-04-01 00:08:25	20150401	Wed	1427839705	-3600	776.625977	-64.609553	-15.031567
30604	4	2015-04-01 00:09:29	20150401	Wed	1427839769	-3600	872.018982	-64.609553	-15.031567
30605	4	2015-04-01 00:10:30	20150401	Wed	1427839830	-3600	963.453003	-64.609553	-15.031567
30606	4	2015-04-01 00:11:31	20150401	Wed	1427839891	-3600	1326.000000	-64.609553	-15.031567
30607	4	2015-04-01 00:12:38	20150401	Wed	1427839958	-3600	1326.000000	-64.609553	-15.031567