Skip to content
Advertisement

Intersect two sf data.frames based on date and geometry using R

So, I have two R “sf” “data.frames”, one with millions of linestring geometries (vsr_segments: see below) and the other with 5 polygons (vsr_zones: see below). Each linestring has a datetime and each polygon has a unique date range.

I’m trying to intersect the linestrings dataframe with the polygon data.frame based on whether the linestring datetime falls within a specific polygon’s date range. Basically, if this linestring datetime is within any of the polygons’ date ranges, perform the intersect with the appropriate polygon, and return a sf data.frame of the linestrings that intersect.

I have a sql query that essentially does this but this only works on my postgres database.

source: https://postgis.net/2014/03/14/tip_intersection_faster/

I’m curious if there’ a better way to do this. It’s simple, but when new data comes in, I have to drop the table, create a new one, and create new indexes.

I’d rather have a way where I only run this datetime within date range spatial intersection with new data (sf data.frame with ~5000 linestrings) and append the resulting data.frame to the existing database table. Is there an r way to do this? I’ve tried sqldf to perform the query below with my r data.frames versus performing the query on my database. Any assistance would be very appreciated.

query = ("CREATE TABLE vsr_segments AS
            SELECT
            s.name, s.mmsi, s.speed,
            s.seg_mins, s.seg_km,
            s.seg_kmhr, s.seg_knots, s.speed_diff,
            s.year, s.beg_dt, s.end_dt,
            s.beg_lon, s.beg_lat,
            s.end_lon, s.end_lat, z.gid,
            CASE
            WHEN
            ST_CoveredBy(s.geometry, z.geom)
            THEN s.geometry
            ELSE
            ST_Multi(
            ST_Intersection(s.geometry, z.geom)
            ) END AS geometry
            FROM ais_segments AS s
            INNER JOIN vsr_zones AS z
            ON ST_Intersects(s.geometry, z.geom)
            WHERE
            s.datetime::date <= z.date_end AND
            s.datetime >= z.date_beg;")

    dbExecute(con, query)

sample data

vsr_segments <- structure(list(
datetime = structure(c(1573348510.52, 1573348830.935, 
1573349296.305, 1573349746.216, 1573349840.846, 1573350013.303, 
1573350371.104, 1573350793.237, 1573350929.837, 1573351206.262, 
1573351530.493, 1573351598.156, 1573351686.598, 1573353232.418, 
1573353368.013, 1573353476.023, 1573354582.045, 1573355374.706, 
1573355522.445, 1573355611.793), class = c("POSIXct", "POSIXt"
), tzone = "UTC"), 
name = c("ALAN T", "ALAN T", "ALAN T", "ALAN T", 
"ALAN T", "ALAN T", "ALAN T", "ALAN T", "ALAN T", "ALAN T", "ALAN T", 
"ALAN T", "ALAN T", "ALAN T", "ALAN T", "ALAN T", "ALAN T", "ALAN T", 
"ALAN T", "ALAN T"), 
geometry = structure(list(structure(c(-119.498252, 
-119.49837, 34.375007, 34.37505), .Dim = c(2L, 2L), class = c("XY", 
"LINESTRING", "sfg")), structure(c(-119.49837, -119.498255, 34.37505, 
34.374992), .Dim = c(2L, 2L), class = c("XY", "LINESTRING", "sfg"
)), structure(c(-119.498255, -119.498193, 34.374992, 34.374958
), .Dim = c(2L, 2L), class = c("XY", "LINESTRING", "sfg")), structure(c(-119.498193, 
-119.498303, 34.374958, 34.375055), .Dim = c(2L, 2L), class = c("XY", 
"LINESTRING", "sfg")), structure(c(-119.498303, -119.498337, 
34.375055, 34.375078), .Dim = c(2L, 2L), class = c("XY", "LINESTRING", 
"sfg")), structure(c(-119.498337, -119.49841, 34.375078, 34.375062
), .Dim = c(2L, 2L), class = c("XY", "LINESTRING", "sfg")), structure(c(-119.49841, 
-119.498435, 34.375062, 34.375055), .Dim = c(2L, 2L), class = c("XY", 
"LINESTRING", "sfg")), structure(c(-119.498435, -119.498368, 
34.375055, 34.375092), .Dim = c(2L, 2L), class = c("XY", "LINESTRING", 
"sfg")), structure(c(-119.498368, -119.498357, 34.375092, 34.375058
), .Dim = c(2L, 2L), class = c("XY", "LINESTRING", "sfg")), structure(c(-119.498357, 
-119.498292, 34.375058, 34.375048), .Dim = c(2L, 2L), class = c("XY", 
"LINESTRING", "sfg")), structure(c(-119.498325, -119.498342, 
34.375035, 34.375053), .Dim = c(2L, 2L), class = c("XY", "LINESTRING", 
"sfg")), structure(c(-119.498342, -119.498427, 34.375053, 34.375072
), .Dim = c(2L, 2L), class = c("XY", "LINESTRING", "sfg")), structure(c(-119.498427, 
-119.49849, 34.375072, 34.375062), .Dim = c(2L, 2L), class = c("XY", 
"LINESTRING", "sfg")), structure(c(-119.498505, -119.498617, 
34.375062, 34.375048), .Dim = c(2L, 2L), class = c("XY", "LINESTRING", 
"sfg")), structure(c(-119.498617, -119.498602, 34.375037, 34.375027
), .Dim = c(2L, 2L), class = c("XY", "LINESTRING", "sfg")), structure(c(-119.498602, 
-119.498607, 34.375027, 34.375028), .Dim = c(2L, 2L), class = c("XY", 
"LINESTRING", "sfg")), structure(c(-119.498607, -119.498267, 
34.375028, 34.374993), .Dim = c(2L, 2L), class = c("XY", "LINESTRING", 
"sfg")), structure(c(-119.498267, -119.4989, 34.374993, 34.374715
), .Dim = c(2L, 2L), class = c("XY", "LINESTRING", "sfg")), structure(c(-119.4989, 
-119.498898, 34.374715, 34.374748), .Dim = c(2L, 2L), class = c("XY", 
"LINESTRING", "sfg")), structure(c(-119.498898, -119.4989, 34.374748, 
34.374723), .Dim = c(2L, 2L), class = c("XY", "LINESTRING", "sfg"
))), class = c("sfc_LINESTRING", "sfc"), precision = 0, bbox = structure(c(xmin = -119.4989, 
ymin = 34.374715, xmax = -119.498193, ymax = 34.375092), class = "bbox"), crs = structure(list(
    epsg = 4326L, proj4string = "+proj=longlat +datum=WGS84 +no_defs"), class = "crs"), n_empty = 0L)), row.names = c(NA, 
20L), class = c("sf", "data.table", "data.frame"), sf_column = "geometry", agr = structure(c(datetime = NA_integer_, 
name = NA_integer_), .Label = c("constant", "aggregate", "identity"
), class = "factor"))


vsr_zones <- structure(list(
gid = c(5L, 1L, 2L, 3L, 4L), 
vsr_category = c("2019", 
"2017v1", "2017v2", "2017v3", "2018"), 
date_beg = structure(c(18031, 
17318, 17348, 17508, 17686), class = "Date"), 
date_end = structure(c(18215, 
17347, 17507, 17590, 17896), class = "Date"), 
date_range = structure(c("("2019-05-15 00:00:00+00","2019-11-15 23:59:59.99+00")", 
"("2017-07-01 00:00:00+00","2017-12-07 23:59:59.99+00")", 
"("2017-07-01 00:00:00+00","2017-12-07 23:59:59.99+00")", 
"("2017-12-08 00:00:00+00","2018-02-28 23:59:59.99+00")", 
"("2018-06-04 00:00:00+00","2018-12-31 23:59:59.99+00")"), class = "pq_tstzrange"), 
    rec_speed_knots = c(10L, 10L, 10L, 10L, 10L), 
geom = structure(list(
        structure(list(list(structure(c(-120.632908463866, -120.604354931796, 
        -120.58151210614, -120.55676571168, -120.530115748414, 
        -120.509176491563, -120.492044372321, -120.476815821884, 
        -120.465394409056, -120.429226601767, -120.383540950455, 
        -120.343566005557, -120.294073216636, -120.252194702933, 
        -120.223641170863, -120.179859088356, -120.126559161826, 
        -120.084680648123, -120.058030684857, -120.037091428006, 
        -120.021862877569, -120.006634327132, -119.95714153821, 
        -119.922877299726, -119.877191648414, -119.844830978735, 
        -119.825795290689, -119.791531052205, -119.709677593604, 
        -119.686834767948, -119.639245547831, -119.58975275891, 
        -119.56119922684, -119.53645283238, -119.483152905849, 
        -119.458406511388, -119.439370823342, -119.38987803442, 
        -119.372745915178, -119.30802457582, -119.290892456578, 
        -119.266146062117, -119.22236397961, -119.207135429173, 
        -119.121474832963, -119.091017732088, -119.037717805557, 
        -119.005357135878, -118.933021521301, -118.847360925091, 
        -118.799771704974, -118.784543154537, -118.735050365616, 
        -118.681750439085, -118.664618319843, -118.613221962117, 
        -118.544693485149, -118.491393558619, -118.451418613721, 
        -118.390504411972, -118.388600843167, -118.400022255995, 
        -118.424768650456, -118.405732962409, -118.36956515512, 
        -118.280097421301, -118.247736751622, -118.186822549873, 
        -118.186822549873, -118.165883293021, -118.110679797686, 
        -118.064994146374, -118.019308495062, -117.964104999727, 
        -117.906997935587, -117.85940871547, -117.808012357744, 
        -117.781362394479, -117.710930348707, -117.680473247832, 
        -117.638594734129, -117.587198376403, -117.571969825966, 
        -117.505344917803, -117.480260689924, -121.029936561572, 
        -121.029936561572, -120.646233445499, -120.632908463866, 
        34.5567050238998, 34.5528978862905, 34.5548014550951, 
        34.5395729046579, 34.5319586294392, 34.5205372166113, 
        34.4900801157366, 34.4786587029085, 34.44249089562, 34.448201602034, 
        34.4596230148619, 34.4577194460573, 34.4691408588853, 
        34.4615265836666, 34.4748515652994, 34.4672372900806, 
        34.4710444276899, 34.4577194460573, 34.4596230148619, 
        34.4596230148619, 34.4577194460573, 34.4596230148619, 
        34.4329730515967, 34.431069482792, 34.4082266571361, 
        34.4006123819174, 34.4120337947453, 34.4158409323548, 
        34.391094537894, 34.4082266571361, 34.41393736355, 34.4158409323548, 
        34.4101302259407, 34.3968052443081, 34.3777695562615, 
        34.3720588498474, 34.35683029941, 34.3168553545121, 34.3187589233168, 
        34.273073272005, 34.273073272005, 34.244519739935, 34.1436305932877, 
        34.1455341620923, 34.0979449419757, 34.0960413731712, 
        34.078909253929, 34.0655842722966, 34.0408378778358, 
        34.0313200338124, 33.9951522265239, 34.01799505218, 34.0313200338124, 
        34.0294164650079, 34.0370307402266, 34.0332236026171, 
        34.0370307402266, 34.0046700705472, 33.9589844192354, 
        33.8352524469322, 33.8143131900808, 33.8047953460574, 
        33.7781453827922, 33.7343633002849, 33.7362668690895, 
        33.7039061994104, 33.7457847131129, 33.7381704378942, 
        33.7591096947457, 33.7610132635502, 33.7438811443082, 
        33.711520474629, 33.6677383921216, 33.6258598784191, 
        33.6011134839584, 33.5839813647164, 33.5478135574277, 
        33.5382957134045, 33.4583458236086, 33.4583458236086, 
        33.4355029979527, 33.3783959338127, 33.3783959338127, 
        33.3365174201101, 33.299879212642, 33.299879212642, 34.5736940817683, 
        34.5738371431418, 34.5567050238998), .Dim = c(89L, 2L
        )))), class = c("XY", "MULTIPOLYGON", "sfg")), structure(list(
            list(structure(c(-120.873273035994, -120.873274461718, 
            -120.487219702881, -120.506822270857, -120.873273035994, 
            34.3781494612153, 34.4709485281576, 34.3814708197392, 
            34.2930682831847, 34.3781494612153), .Dim = c(5L, 
            2L)))), class = c("XY", "MULTIPOLYGON", "sfg")), 
        structure(list(list(structure(c(-120.876185794554, -120.873274695607, 
        -119.449753967941, -119.469363746559, -120.876185794554, 
        34.3960669578761, 34.4861721639365, 34.1563446852463, 
        34.0713786726887, 34.3960669578761), .Dim = c(5L, 2L)))), class = c("XY", 
        "MULTIPOLYGON", "sfg")), structure(list(list(structure(c(-120.050247166472, 
        -120.027227238885, -119.26260029791, -119.285620225496, 
        -120.050247166472, 34.1816435818833, 34.2854817798431, 
        34.1159713573041, 34.0121331593446, 34.1816435818833), .Dim = c(5L, 
        2L)))), class = c("XY", "MULTIPOLYGON", "sfg")), structure(list(
            list(structure(c(-118.986389683899, -118.994289494438, 
            -119.276683926611, -119.874404531275, -120.177071971217, 
            -120.709119452121, -120.873778153724, -120.873274108521, 
            -120.672951644832, -120.507341158826, -120.343634241625, 
            -120.195155874861, -119.90105449454, -119.744010068156, 
            -119.586013857368, -119.427065862179, -119.249082178943, 
            -118.974016486669, -118.986389683899, 33.9223965975751, 
            33.9007910916422, 34.0337553726479, 34.1698605421817, 
            34.2383890191496, 34.3592656382457, 34.397337014339, 
            34.4479592752089, 34.403047720753, 34.3649763446597, 
            34.3278567529687, 34.2935925144848, 34.2279193907238, 
            34.1917515834352, 34.1555837761465, 34.1203677532603, 
            34.0794410239599, 33.9471429920358, 33.9223965975751
            ), .Dim = c(19L, 2L)))), class = c("XY", "MULTIPOLYGON", 
        "sfg"))), n_empty = 0L, class = c("sfc_MULTIPOLYGON", 
    "sfc"), precision = 0, bbox = structure(c(xmin = -121.029936561572, 
    ymin = 33.299879212642, xmax = -117.480260689924, ymax = 34.5738371431418
    ), class = "bbox"), crs = structure(list(epsg = 4326L, proj4string = "+proj=longlat +datum=WGS84 +no_defs"), class = "crs"))), row.names = c(NA, 
-5L), class = c("sf", "data.frame"), sf_column = "geom", agr = structure(c(gid = NA_integer_, 
vsr_category = NA_integer_, date_beg = NA_integer_, date_end = NA_integer_, 
date_range = NA_integer_, rec_speed_knots = NA_integer_), class = "factor", .Label = c("constant", 
"aggregate", "identity")))

Advertisement

Answer

A two-step approach could be to do a range join (or non-equi join) to find which rows of data overlap by date range, then a pair-wise geometry intersection to tell you whether they intersect spatially or not

I like using library(data.table) for all joining operations because of its superior memory management and speed

library(sf)
library(data.table)

setDT( vsr_segments )
setDT( vsr_zones )

## add a row_index onto segments
## (I'm assuming gid is a unique id on vsr_zones)
vsr_segments[, row_id := .I ]

## make a POSIXct column so we can do a range join (on the same data)
vsr_zones[
  , `:=`(
    posix_beg = as.POSIXct( date_beg )   ## set whichever timezone is appropriate
    , posix_end = as.POSIXct( date_end )
  )
]

## use a range join (or non-equi join) to give you the overlapping geometries in time
dt <- vsr_zones[
  vsr_segments
  , on = .(posix_beg <= datetime, posix_end >= datetime)
] 

dt[, int := list( sf::st_intersection( geom, geometry )), by = .(row_id)]

## now the 'int' column is the geometry created by the intersection of geom and geometry
User contributions licensed under: CC BY-SA
10 People found this is helpful
Advertisement