[R] Help with looping a function over a list of dataframes:

Kathan Desai kde@@|1 @end|ng |rom @he|||e|d@@c@uk
Fri Jul 3 22:56:32 CEST 2020


I have been trying to run a forloop for a function that compares dataframe n
with dataframe n-1, across a list of dataframes. It does this by checking
each midpoint of dataframe n with each midpoint of dataframe n-1. This is
done to make up for an disparity in row length. The idea of this code is to
identify any objects that are stationary, and assign them an id of 1, and
the dynamic objects are assigned an id of 0 (examples can be found below).


*This is what i have so far:*
for(i in seq_along(list_df)){
   list_df$position_tab_[[i]]$ID <-
     unlist(lapply(list_df$position_tab_[[i]]$midpoint, function(x)
                 ifelse(any(abs(x - list_df$position_tab_[[i-1]]$midpoint)
<= 1),1,0)
            ))
}

There is no error message being produced so theres nothing to debug, i am
quite new to R programming in general so excuse any silly mistakes i may
have made. The function doesnt seem to be adding the ID columns and
comparing the data as it should.

my list of dataframes contain dataframes named: position_tab_1,
position_tab_2 .... position_tab_121. Each position_tab represents a
timepoints, so in total there are 121 timepoints (frames). I need the loop
to run so that pos_tab_2 compares to pos_tab_1 and this continues all the
way to pos_tab_121 comparing to pos_tab_120.

The function adds a column named "id" to each of these dataframes as it
compares to the dataframe before it, so all dataframes apart from
position_tab_1 (as it has nothing to compare to) should have this added.


*Some of my data (first 10 dataframes in list):*
> dput(list_df[1:10])
list(position_tab_1 = structure(list(Object = c(2666L, 2668L,
2671L, 2674L, 2676L, 2677L, 2678L, 2679L, 2680L, 2682L, 2683L,
2684L, 2685L, 2686L, 2687L, 2689L, 2692L, 2693L, 2694L, 2695L,
2696L), minimum = c(4L, 39L, 147L, 224L, 419L, 531L, 595L, 641L,
669L, 723L, 810L, 836L, 907L, 978L, 1061L, 1129L, 1290L, 1519L,
1749L, 1843L, 1897L), maximum = c(22L, 85L, 173L, 242L, 449L,
587L, 627L, 655L, 702L, 740L, 828L, 890L, 923L, 1024L, 1086L,
1144L, 1302L, 1544L, 1780L, 1870L, 1925L), midpoint = c(13, 62,
160, 233, 434, 559, 611, 648, 685.5, 731.5, 819, 863, 915, 1001,
1073.5, 1136.5, 1296, 1531.5, 1764.5, 1856.5, 1911)), row.names = c(NA,
-21L), class = c("tbl_df", "tbl", "data.frame")), position_tab_2 =
structure(list(
    Object = c(2645L, 2646L, 2650L, 2652L, 2655L, 2656L, 2657L,
    2658L, 2659L, 2661L, 2662L, 2663L, 2664L, 2665L, 2667L, 2670L,
    2675L, 2681L, 2688L, 2690L, 2691L), minimum = c(4L, 40L,
    147L, 224L, 415L, 532L, 595L, 641L, 670L, 722L, 811L, 835L,
    907L, 978L, 1061L, 1128L, 1289L, 1520L, 1748L, 1843L, 1897L
    ), maximum = c(22L, 85L, 173L, 242L, 445L, 588L, 627L, 655L,
    702L, 739L, 828L, 891L, 923L, 1022L, 1085L, 1143L, 1302L,
    1544L, 1779L, 1870L, 1925L), midpoint = c(13, 62.5, 160,
    233, 430, 560, 611, 648, 686, 730.5, 819.5, 863, 915, 1000,
    1073, 1135.5, 1295.5, 1532, 1763.5, 1856.5, 1911)), row.names = c(NA,
-21L), class = c("tbl_df", "tbl", "data.frame")), position_tab_3 =
structure(list(
    Object = c(2623L, 2624L, 2627L, 2631L, 2633L, 2635L, 2636L,
    2637L, 2638L, 2640L, 2641L, 2642L, 2643L, 2644L, 2647L, 2649L,
    2654L, 2660L, 2669L, 2672L, 2673L), minimum = c(3L, 39L,
    149L, 223L, 402L, 539L, 594L, 639L, 669L, 722L, 811L, 834L,
    907L, 979L, 1060L, 1129L, 1289L, 1520L, 1749L, 1842L, 1897L
    ), maximum = c(22L, 86L, 175L, 241L, 431L, 587L, 627L, 653L,
    700L, 738L, 828L, 894L, 925L, 1021L, 1084L, 1144L, 1302L,
    1544L, 1779L, 1869L, 1925L), midpoint = c(12.5, 62.5, 162,
    232, 416.5, 563, 610.5, 646, 684.5, 730, 819.5, 864, 916,
    1000, 1072, 1136.5, 1295.5, 1532, 1764, 1855.5, 1911)), row.names =
c(NA,
-21L), class = c("tbl_df", "tbl", "data.frame")), position_tab_4 =
structure(list(
    Object = c(2600L, 2604L, 2606L, 2609L, 2611L, 2613L, 2614L,
    2615L, 2617L, 2618L, 2619L, 2620L, 2621L, 2622L, 2626L, 2628L,
    2632L, 2639L, 2648L, 2651L, 2653L), minimum = c(4L, 42L,
    142L, 223L, 393L, 547L, 595L, 641L, 669L, 720L, 809L, 835L,
    908L, 979L, 1059L, 1127L, 1289L, 1519L, 1749L, 1841L, 1897L
    ), maximum = c(22L, 85L, 172L, 241L, 421L, 587L, 629L, 655L,
    701L, 738L, 826L, 890L, 925L, 1019L, 1084L, 1143L, 1301L,
    1544L, 1780L, 1868L, 1925L), midpoint = c(13, 63.5, 157,
    232, 407, 567, 612, 648, 685, 729, 817.5, 862.5, 916.5, 999,
    1071.5, 1135, 1295, 1531.5, 1764.5, 1854.5, 1911)), row.names = c(NA,
-21L), class = c("tbl_df", "tbl", "data.frame")), position_tab_5 =
structure(list(
    Object = c(2580L, 2581L, 2585L, 2586L, 2589L, 2590L, 2592L,
    2593L, 2594L, 2595L, 2596L, 2597L, 2598L, 2599L, 2601L, 2603L,
    2605L, 2610L, 2616L, 2625L, 2629L, 2630L, 2634L), minimum = c(3L,
    43L, 132L, 223L, 391L, 547L, 643L, 669L, 721L, 808L, 836L,
    861L, 908L, 978L, 1028L, 1057L, 1127L, 1288L, 1519L, 1748L,
    1842L, 1893L, 2148L), maximum = c(21L, 85L, 168L, 241L, 419L,
    629L, 656L, 701L, 738L, 828L, 858L, 890L, 925L, 1013L, 1050L,
    1083L, 1143L, 1301L, 1544L, 1780L, 1869L, 1925L, 2148L),
    midpoint = c(12, 64, 150, 232, 405, 588, 649.5, 685, 729.5,
    818, 847, 875.5, 916.5, 995.5, 1039, 1070, 1135, 1294.5,
    1531.5, 1764, 1855.5, 1909, 2148)), row.names = c(NA, -23L
), class = c("tbl_df", "tbl", "data.frame")), position_tab_6 =
structure(list(
    Object = c(2555L, 2559L, 2562L, 2563L, 2564L, 2567L, 2569L,
    2570L, 2571L, 2572L, 2573L, 2574L, 2575L, 2576L, 2577L, 2579L,
    2583L, 2587L, 2591L, 2602L, 2607L, 2608L, 2612L), minimum = c(4L,
    45L, 123L, 154L, 224L, 390L, 546L, 600L, 643L, 669L, 720L,
    804L, 836L, 908L, 967L, 1058L, 1129L, 1289L, 1519L, 1748L,
    1843L, 1893L, 2147L), maximum = c(23L, 86L, 150L, 171L, 241L,
    419L, 589L, 636L, 657L, 701L, 738L, 827L, 879L, 925L, 1011L,
    1084L, 1144L, 1301L, 1543L, 1780L, 1871L, 1924L, 2148L),
    midpoint = c(13.5, 65.5, 136.5, 162.5, 232.5, 404.5, 567.5,
    618, 650, 685, 729, 815.5, 857.5, 916.5, 989, 1071, 1136.5,
    1295, 1531, 1764, 1857, 1908.5, 2147.5)), row.names = c(NA,
-23L), class = c("tbl_df", "tbl", "data.frame")), position_tab_7 =
structure(list(
    Object = c(2537L, 2539L, 2540L, 2541L, 2542L, 2544L, 2546L,
    2547L, 2548L, 2549L, 2550L, 2551L, 2552L, 2554L, 2556L, 2558L,
    2560L, 2565L, 2568L, 2578L, 2582L, 2584L, 2588L), minimum = c(3L,
    45L, 122L, 156L, 224L, 387L, 546L, 601L, 669L, 719L, 803L,
    837L, 908L, 959L, 1059L, 1096L, 1128L, 1289L, 1519L, 1748L,
    1844L, 1892L, 2147L), maximum = c(22L, 86L, 147L, 172L, 241L,
    415L, 590L, 656L, 699L, 738L, 830L, 871L, 924L, 1014L, 1082L,
    1119L, 1144L, 1301L, 1543L, 1781L, 1872L, 1925L, 2148L),
    midpoint = c(12.5, 65.5, 134.5, 164, 232.5, 401, 568, 628.5,
    684, 728.5, 816.5, 854, 916, 986.5, 1070.5, 1107.5, 1136,
    1295, 1531, 1764.5, 1858, 1908.5, 2147.5)), row.names = c(NA,
-23L), class = c("tbl_df", "tbl", "data.frame")), position_tab_8 =
structure(list(
    Object = c(2514L, 2517L, 2519L, 2520L, 2521L, 2523L, 2525L,
    2526L, 2527L, 2528L, 2529L, 2530L, 2531L, 2532L, 2533L, 2534L,
    2536L, 2543L, 2545L, 2553L, 2557L, 2561L, 2566L), minimum = c(5L,
    44L, 121L, 153L, 224L, 380L, 546L, 603L, 668L, 721L, 802L,
    841L, 907L, 960L, 1006L, 1060L, 1106L, 1288L, 1518L, 1748L,
    1843L, 1893L, 2148L), maximum = c(23L, 86L, 146L, 170L, 242L,
    409L, 588L, 655L, 699L, 738L, 830L, 872L, 924L, 994L, 1029L,
    1084L, 1143L, 1302L, 1543L, 1781L, 1870L, 1925L, 2148L),
    midpoint = c(14, 65, 133.5, 161.5, 233, 394.5, 567, 629,
    683.5, 729.5, 816, 856.5, 915.5, 977, 1017.5, 1072, 1124.5,
    1295, 1530.5, 1764.5, 1856.5, 1909, 2148)), row.names = c(NA,
-23L), class = c("tbl_df", "tbl", "data.frame")), position_tab_9 =
structure(list(
    Object = c(2492L, 2493L, 2497L, 2498L, 2499L, 2501L, 2503L,
    2504L, 2505L, 2506L, 2507L, 2508L, 2509L, 2510L, 2511L, 2513L,
    2516L, 2522L, 2524L, 2532L, 2535L, 2538L), minimum = c(6L,
    44L, 111L, 149L, 224L, 375L, 548L, 596L, 668L, 722L, 800L,
    840L, 908L, 960L, 1005L, 1058L, 1127L, 1289L, 1519L, 1748L,
    1842L, 1891L), maximum = c(24L, 81L, 137L, 167L, 242L, 403L,
    589L, 656L, 699L, 738L, 828L, 872L, 925L, 994L, 1028L, 1081L,
    1149L, 1302L, 1544L, 1780L, 1868L, 1924L), midpoint = c(15,
    62.5, 124, 158, 233, 389, 568.5, 626, 683.5, 730, 814, 856,
    916.5, 977, 1016.5, 1069.5, 1138, 1295.5, 1531.5, 1764, 1855,
    1907.5)), row.names = c(NA, -22L), class = c("tbl_df", "tbl",
"data.frame")), position_tab_10 = structure(list(Object = c(2469L,
2471L, 2474L, 2475L, 2476L, 2478L, 2481L, 2482L, 2483L, 2484L,
2485L, 2486L, 2487L, 2488L, 2489L, 2491L, 2495L, 2500L, 2502L,
2512L, 2515L, 2518L), minimum = c(6L, 38L, 109L, 147L, 223L,
363L, 548L, 597L, 668L, 719L, 803L, 839L, 908L, 958L, 1004L,
1058L, 1126L, 1288L, 1519L, 1746L, 1841L, 1892L), maximum = c(24L,
76L, 134L, 165L, 240L, 394L, 591L, 656L, 698L, 737L, 829L, 869L,
924L, 996L, 1027L, 1081L, 1147L, 1301L, 1543L, 1781L, 1868L,
1925L), midpoint = c(15, 57, 121.5, 156, 231.5, 378.5, 569.5,
626.5, 683, 728, 816, 854, 916, 977, 1015.5, 1069.5, 1136.5,
1294.5, 1531, 1763.5, 1854.5, 1908.5)), row.names = c(NA, -22L
), class = c("tbl_df", "tbl", "data.frame")))

*What is produced when running the base code without any loops:*

This is the base code without me trying to loop it in anyway, below is what
it produces when its used with dataframe 4 and 5:

#the code:
list_df$position_tab_5$ID <- unlist(lapply(list_df$position_tab_5$midpoint,
function(x) ifelse(any(abs(x - list_df$position_tab_4$midpoint) <= 1),1,0)))

##position_tab_5 after manipulations have occured:
structure(list(Object = c(2580L, 2581L, 2585L, 2586L, 2589L,
2590L, 2592L, 2593L, 2594L, 2595L, 2596L, 2597L, 2598L, 2599L,
2601L, 2603L, 2605L, 2610L, 2616L, 2625L, 2629L, 2630L, 2634L
), minimum = c(3L, 43L, 132L, 223L, 391L, 547L, 643L, 669L, 721L,
808L, 836L, 861L, 908L, 978L, 1028L, 1057L, 1127L, 1288L, 1519L,
1748L, 1842L, 1893L, 2148L), maximum = c(21L, 85L, 168L, 241L,
419L, 629L, 656L, 701L, 738L, 828L, 858L, 890L, 925L, 1013L,
1050L, 1083L, 1143L, 1301L, 1544L, 1780L, 1869L, 1925L, 2148L
), midpoint = c(12, 64, 150, 232, 405, 588, 649.5, 685, 729.5,
818, 847, 875.5, 916.5, 995.5, 1039, 1070, 1135, 1294.5, 1531.5,
1764, 1855.5, 1909, 2148), ID = c(1, 1, 0, 1, 0, 0, 0, 1, 1,
1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0)), row.names = c(NA,
-23L), class = c("tbl_df", "tbl", "data.frame"))

#position_tab_4 (the DF pos_tab_5 is being compared to)
structure(list(Object = c(2600L, 2604L, 2606L, 2609L, 2611L,
2613L, 2614L, 2615L, 2617L, 2618L, 2619L, 2620L, 2621L, 2622L,
2626L, 2628L, 2632L, 2639L, 2648L, 2651L, 2653L), minimum = c(4L,
42L, 142L, 223L, 393L, 547L, 595L, 641L, 669L, 720L, 809L, 835L,
908L, 979L, 1059L, 1127L, 1289L, 1519L, 1749L, 1841L, 1897L),
    maximum = c(22L, 85L, 172L, 241L, 421L, 587L, 629L, 655L,
    701L, 738L, 826L, 890L, 925L, 1019L, 1084L, 1143L, 1301L,
    1544L, 1780L, 1868L, 1925L), midpoint = c(13, 63.5, 157,
    232, 407, 567, 612, 648, 685, 729, 817.5, 862.5, 916.5, 999,
    1071.5, 1135, 1295, 1531.5, 1764.5, 1854.5, 1911)), row.names = c(NA,
-21L), class = c("tbl_df", "tbl", "data.frame"))

*Appreciate any help, anyone can provide!*

	[[alternative HTML version deleted]]



More information about the R-help mailing list