Column Operations¶

Here you will see a detailed overview of all the column operations available in Optimus. You can access the operation via df.cols

Let’s create a sample dataframe to start working.

# Import Optimus
from optimus import Optimus
# Create Optimus instance
op = Optimus()

from pyspark.sql.types import StructType, StructField, StringType, BooleanType, IntegerType, ArrayType

df = op.create.df(
            [
                ("words", "str", True),
                ("num", "int", True),
                ("animals", "str", True),
                ("thing", StringType(), True),
                ("two strings", StringType(), True),
                ("filter", StringType(), True),
                ("num 2", "string", True),
                ("col_array",  ArrayType(StringType()), True),
                ("col_int",  ArrayType(IntegerType()), True)

            ]
,
[
                ("  I like     fish  ", 1, "dog", "housé", "cat-car", "a","1",["baby", "sorry"],[1,2,3]),
                ("    zombies", 2, "cat", "tv", "dog-tv", "b","2",["baby 1", "sorry 1"],[3,4]),
                ("simpsons   cat lady", 2, "frog", "table","eagle-tv-plus","1","3", ["baby 2", "sorry 2"], [5,6,7]),
                (None, 3, "eagle", "glass", "lion-pc", "c","4", ["baby 3", "sorry 3"] ,[7,8])
            ])

To see the dataframe we will use the table() function, a much better way to see your results, instead of the built-in `show() function.

df.table()

words	num	animals	thing	two strings	filter	num 2	col_array	col_int
I like fish	1	dog	housé	cat-car	a	1	[baby, sorry]	[1, 2, 3]
zombies	2	cat	tv	dog-tv	b	2	[baby 1, sorry 1]	[3, 4]
simpsons cat lady	2	frog	table	eagle-tv-plus	1	3	[baby 2, sorry 2]	[5, 6, 7]
null	3	eagle	glass	lion-pc	c	4	[baby 3, sorry 3]	[7, 8]

cols.append(col_name=None, value=None)¶

Appends a column to a Dataframe

df = df.cols.append("new_col_1", 1)
df.table()

words	num	animals	thing	two strings	filter	num 2	col_array	col_int	new_col_1
I like fish	1	dog	housé	cat-car	a	1	[baby, sorry]	[1, 2, 3]	1
zombies	2	cat	tv	dog-tv	b	2	[baby 1, sorry 1]	[3, 4]	1
simpsons cat lady	2	frog	table	eagle-tv-plus	1	3	[baby 2, sorry 2]	[5, 6, 7]	1
null	3	eagle	glass	lion-pc	c	4	[baby 3, sorry 3]	[7, 8]	1

from pyspark.sql.functions import *

df.cols.append([
    ("new_col_2", 2.22),
    ("new_col_3", lit(3))
    ]).table()

words	num	animals	thing	two strings	filter	num 2	col_array	col_int	new_col_1	new_col_2	new_col_3
I like fish	1	dog	housé	cat-car	a	1	[baby, sorry]	[1, 2, 3]	1	2.22	3
zombies	2	cat	tv	dog-tv	b	2	[baby 1, sorry 1]	[3, 4]	1	2.22	3
simpsons cat lady	2	frog	table	eagle-tv-plus	1	3	[baby 2, sorry 2]	[5, 6, 7]	1	2.22	3
null	3	eagle	glass	lion-pc	c	4	[baby 3, sorry 3]	[7, 8]	1	2.22	3

df.cols.append([
("new_col_4", "test"),
("new_col_5", df['num']*2),
("new_col_6", [1,2,3])
]).table()

words

num

animals

thing

two strings

filter

num 2

col_array

col_int

new_col_1

new_col_4

new_col_5

new_col_6

I like fish

1

dog

housé

cat-car

a

1

[baby, sorry]

[1, 2, 3]

1

test

2

[1, 2, 3]

zombies

2

cat

tv

dog-tv

b

2

[baby 1, sorry 1]

[3, 4]

1

test

4

[1, 2, 3]

simpsons cat lady

2

frog

table

eagle-tv-plus

1

3

[baby 2, sorry 2]

[5, 6, 7]

1

test

4

[1, 2, 3]

null

3

eagle

glass

lion-pc

c

4

[baby 3, sorry 3]

[7, 8]

1

test

6

[1, 2, 3]

cols.select(columns=None, regex=None, data_type=None)¶

Select columns using index, column name, regex or data type

columns = ["words", 1, "animals", 3]
df.cols.select(columns).table()

words	num	animals	thing
I like fish	1	dog	housé
zombies	2	cat	tv
simpsons cat lady	2	frog	table
null	3	eagle	glass

df.cols.select("n.*", regex = True).show()

num	num 2	new_col_1
1	1	1
2	2	1
2	3	1
3	4	1

df.cols.select("*", data_type = "str").table()

thing	words	animals	filter	two strings	num 2
housé	I like fish	dog	a	cat-car	1
tv	zombies	cat	b	dog-tv	2
table	simpsons cat lady	frog	1	eagle-tv-plus	3
glass	null	eagle	c	lion-pc	4

cols.rename(columns_old_new=None, func=None)¶

Changes the name of a column(s) dataFrame.

df.cols.rename('num','number').table()

words	number	animals	thing	two strings	filter	num 2	col_array	col_int	new_col_1
I like fish	1	dog	housé	cat-car	a	1	[baby, sorry]	[1, 2, 3]	1
zombies	2	cat	tv	dog-tv	b	2	[baby 1, sorry 1]	[3, 4]	1
simpsons cat lady	2	frog	table	eagle-tv-plus	1	3	[baby 2, sorry 2]	[5, 6, 7]	1
null	3	eagle	glass	lion-pc	c	4	[baby 3, sorry 3]	[7, 8]	1

df.cols.rename([('num','number'),("animals","gods")], str.upper).table()

WORDS	NUM	ANIMALS	THING	TWO STRINGS	FILTER	NUM 2	COL_ARRAY	COL_INT	NEW_COL_1
I like fish	1	dog	housé	cat-car	a	1	[baby, sorry]	[1, 2, 3]	1
zombies	2	cat	tv	dog-tv	b	2	[baby 1, sorry 1]	[3, 4]	1
simpsons cat lady	2	frog	table	eagle-tv-plus	1	3	[baby 2, sorry 2]	[5, 6, 7]	1
null	3	eagle	glass	lion-pc	c	4	[baby 3, sorry 3]	[7, 8]	1

df.cols.rename(str.lower).table()

words	num	animals	thing	two strings	filter	num 2	col_array	col_int	new_col_1
I like fish	1	dog	housé	cat-car	a	1	[baby, sorry]	[1, 2, 3]	1
zombies	2	cat	tv	dog-tv	b	2	[baby 1, sorry 1]	[3, 4]	1
simpsons cat lady	2	frog	table	eagle-tv-plus	1	3	[baby 2, sorry 2]	[5, 6, 7]	1
null	3	eagle	glass	lion-pc	c	4	[baby 3, sorry 3]	[7, 8]	1

cols.cast()¶

Cast multiple columns to a specific datatype.

List of tuples of column names and types to be casted. This variable should have the following structure:

colsAndTypes = [(‘columnName1’, ‘integer’), (‘columnName2’, ‘float’), (‘columnName3’, ‘string’)]

The first parameter in each tuple is the column name, the second is the final datatype of column after the transformation is made.

df.cols.cast([("num", "string"),("num 2", "integer")]).dtypes

 [('words', 'string'),
 ('num', 'string'),
 ('animals', 'string'),
 ('thing', 'string'),
 ('two strings', 'string'),
 ('filter', 'string'),
 ('num 2', 'int'),
 ('col_array', 'array<string>'),
 ('col_int', 'array<int>'),
 ('new_col_1', 'int')]

You can cast all columns to a specific type too.

df.cols.cast("*", "string").dtypes

[('words', 'string'),
 ('num', 'string'),
 ('animals', 'string'),
 ('thing', 'string'),
 ('two strings', 'string'),
 ('filter', 'string'),
 ('num 2', 'string'),
 ('col_array', 'string'),
 ('col_int', 'string'),
 ('new_col_1', 'string')]

cols.keep(columns=None, regex=None)¶

Only keep the columns specified.

df.cols.keep("num").table()

num

1

2

3

cols.move(column, position, ref_col)¶

Move a column to specific position

df.cols.move("words", "after", "thing").table()

num	animals	thing	words	two strings	filter	num 2	col_array	col_int	new_col_1
1	dog	housé	I like fish	cat-car	a	1	[baby, sorry]	[1, 2, 3]	1
2	cat	tv	zombies	dog-tv	b	2	[baby 1, sorry 1]	[3, 4]	1
2	frog	table	simpsons cat lady	eagle-tv-plus	1	3	[baby 2, sorry 2]	[5, 6, 7]	1
3	eagle	glass	null	lion-pc	c	4	[baby 3, sorry 3]	[7, 8]	1

cols.sort(order=”asc”)¶

Sort dataframes columns asc or desc

df.cols.sort().table()

animals	col_array	col_int	filter	new_col_1	num	num 2	thing	two strings	words
dog	[baby, sorry]	[1, 2, 3]	a	1	1	1	housé	cat-car	I like fish
cat	[baby 1, sorry 1]	[3, 4]	b	1	2	2	tv	dog-tv	zombies
frog	[baby 2, sorry 2]	[5, 6, 7]	1	1	2	3	table	eagle-tv-plus	simpsons cat lady
eagle	[baby 3, sorry 3]	[7, 8]	c	1	3	4	glass	lion-pc	null

df.cols.sort(order = "desc").table()

words	two strings	thing	num 2	num	new_col_1	filter	col_int	col_array	animals
I like fish	cat-car	housé	1	1	1	a	[1, 2, 3]	[baby, sorry]	dog
zombies	dog-tv	tv	2	2	1	b	[3, 4]	[baby 1, sorry 1]	cat
simpsons cat lady	eagle-tv-plus	table	3	2	1	1	[5, 6, 7]	[baby 2, sorry 2]	frog
null	lion-pc	glass	4	3	1	c	[7, 8]	[baby 3, sorry 3]	eagle

cols.drop()¶

Drops a list of columns

df2 = df.cols.drop("num")
df2.table()

words	animals	thing	two strings	filter	num 2	col_array	col_int	new_col_1
I like fish	dog	housé	cat-car	a	1	[baby, sorry]	[1, 2, 3]	1
zombies	cat	tv	dog-tv	b	2	[baby 1, sorry 1]	[3, 4]	1
simpsons cat lady	frog	table	eagle-tv-plus	1	3	[baby 2, sorry 2]	[5, 6, 7]	1
null	eagle	glass	lion-pc	c	4	[baby 3, sorry 3]	[7, 8]	1

df2 = df.cols.drop(["num","words"])
df2.table()

animals	thing	two strings	filter	num 2	col_array	col_int	new_col_1
dog	housé	cat-car	a	1	[baby, sorry]	[1, 2, 3]	1
cat	tv	dog-tv	b	2	[baby 1, sorry 1]	[3, 4]	1
frog	table	eagle-tv-plus	1	3	[baby 2, sorry 2]	[5, 6, 7]	1
eagle	glass	lion-pc	c	4	[baby 3, sorry 3]	[7, 8]	1

Chaining¶

The past transformations were done step by step, but this can be achieved by chaining all operations into one line of code, like the cell below. This way is much more efficient and scalable because it uses all optimization issues from the lazy evaluation approach.

df\
.cols.rename([('num','number')])\
.cols.drop(["number","words"])\
.withColumn("new_col_2", lit("spongebob"))\
.cols.append("new_col_1", 1)\
.cols.sort(order= "desc")\
.rows.drop(df["num 2"] == 3)\
.table()

two strings	thing	num 2	new_col_2	new_col_1	filter	col_int	col_array	animals
cat-car	housé	1	spongebob	1	a	[1, 2, 3]	[baby, sorry]	dog
dog-tv	tv	2	spongebob	1	b	[3, 4]	[baby 1, sorry 1]	cat
lion-pc	glass	4	spongebob	1	c	[7, 8]	[baby 3, sorry 3]	eagle

cols.unnest(columns, mark=None, n=None, index=None)¶

Split array or string in different columns

df.cols.unnest("two strings","-").table()

words	num	animals	thing	two strings	filter	num 2	col_array	col_int	two strings_0	two strings_1
I like fish	1	dog	housé	cat-car	a	1	[baby, sorry]	[1, 2, 3]	cat	car
zombies	2	cat	tv	dog-tv	b	2	[baby 1, sorry 1]	[3, 4]	dog	tv
simpsons cat lady	2	frog	table	eagle-tv-plus	1	3	[baby 2, sorry 2]	[5, 6, 7]	eagle	tv
null	3	eagle	glass	lion-pc	c	4	[baby 3, sorry 3]	[7, 8]	lion	pc

Only getting the first element

df.cols.unnest("two strings","-", index = 1).table()

words	num	animals	thing	two strings	filter	num 2	col_array	col_int	two strings_1
I like fish	1	dog	housé	cat-car	a	1	[baby, sorry]	[1, 2, 3]	car
zombies	2	cat	tv	dog-tv	b	2	[baby 1, sorry 1]	[3, 4]	tv
simpsons cat lady	2	frog	table	eagle-tv-plus	1	3	[baby 2, sorry 2]	[5, 6, 7]	tv
null	3	eagle	glass	lion-pc	c	4	[baby 3, sorry 3]	[7, 8]	pc

Unnest array of string

df.cols.unnest(["col_array"]).table()

words	num	animals	thing	two strings	filter	num 2	col_array	col_int	col_array_0	col_array_1
I like fish	1	dog	housé	cat-car	a	1	[baby, sorry]	[1, 2, 3]	baby	sorry
zombies	2	cat	tv	dog-tv	b	2	[baby 1, sorry 1]	[3, 4]	baby 1	sorry 1
simpsons cat lady	2	frog	table	eagle-tv-plus	1	3	[baby 2, sorry 2]	[5, 6, 7]	baby 2	sorry 2
null	3	eagle	glass	lion-pc	c	4	[baby 3, sorry 3]	[7, 8]	baby 3	sorry 3

Split in 3 parts

df \
.cols.unnest(["two strings"], n= 3, mark = "-") \
.table()

words	num	animals	thing	two strings	filter	num 2	col_array	col_int	two strings_0	two strings_1	two strings_2
I like fish	1	dog	housé	cat-car	a	1	[baby, sorry]	[1, 2, 3]	cat	car	null
zombies	2	cat	tv	dog-tv	b	2	[baby 1, sorry 1]	[3, 4]	dog	tv	null
simpsons cat lady	2	frog	table	eagle-tv-plus	1	3	[baby 2, sorry 2]	[5, 6, 7]	eagle	tv	plus
null	3	eagle	glass	lion-pc	c	4	[baby 3, sorry 3]	[7, 8]	lion	pc	null

cols.impute(input_cols, output_cols, strategy=”mean”)¶

Imputes missing data from specified columns using the mean or median.

# Create test dataset
df_fill = op.spark.createDataFrame([(1.0, float("nan")), (2.0, float("nan")),
                           (float("nan"), 3.0), (4.0, 4.0), (5.0, 5.0)], ["a", "b"])

df_fill.cols.impute(["a", "b"], ["out_a", "out_b"], "median").table()

a	b	out_a	out_b
1.0	NaN	1.0	4.0
2.0	NaN	2.0	4.0
NaN	3.0	2.0	3.0
4.0	4.0	4.0	4.0
5.0	5.0	5.0	5.0

cols.select_by_dtypes(data_type)¶

Returns one or multiple dataframe columns which match with the data type provided.

df.cols.select_by_dtypes("int").table()

num

1

2

3

cols.apply_by_dtypes(columns, func, func_return_type, args=None, func_type=None, data_type=None)¶

Apply a function using pandas udf or udf if apache arrow is not available.

In the next example we replace a number in a string column with “new string”:

def func(val, attr):
    return attr

df.cols.apply_by_dtypes("filter", func, "string", "new string", data_type="integer").table()

words	num	animals	thing	two strings	filter	num 2	col_array	col_int
I like fish	1	dog	housé	cat-car	a	1	[baby, sorry]	[1, 2, 3]
zombies	2	cat	tv	dog-tv	b	2	[baby 1, sorry 1]	[3, 4]
simpsons cat lady	2	frog	table	eagle-tv-plus	new string	3	[baby 2, sorry 2]	[5, 6, 7]
null	3	eagle	glass	lion-pc	c	4	[baby 3, sorry 3]	[7, 8]

User Define Functions in Optimus¶

Now we’ll create a UDF function that sum a values (32 in this case) to two columns

df = df.cols.append("new_col_1", 1)

def func(val, attr):
    return val + attr

df.cols.apply(["num", "new_col_1"], func, "int", 32 ,"udf").table()

words	num	animals	thing	two strings	filter	num 2	col_array	col_int	new_col_1
I like fish	33	dog	housé	cat-car	a	1	[baby, sorry]	[1, 2, 3]	33
zombies	34	cat	tv	dog-tv	b	2	[baby 1, sorry 1]	[3, 4]	33
simpsons cat lady	34	frog	table	eagle-tv-plus	1	3	[baby 2, sorry 2]	[5, 6, 7]	33
null	35	eagle	glass	lion-pc	c	4	[baby 3, sorry 3]	[7, 8]	33

Now a we’ll create a Pandas UDF function that sum a values (10 in this case) to two columns

def func(val, attr):
    return val + attr

df.cols.apply(["num", "new_col_1"], func, "int", 10).table()

words	num	animals	thing	two strings	filter	num 2	col_array	col_int	new_col_1
I like fish	11	dog	housé	cat-car	a	1	[baby, sorry]	[1, 2, 3]	11
zombies	12	cat	tv	dog-tv	b	2	[baby 1, sorry 1]	[3, 4]	11
simpsons cat lady	12	frog	table	eagle-tv-plus	1	3	[baby 2, sorry 2]	[5, 6, 7]	11
null	13	eagle	glass	lion-pc	c	4	[baby 3, sorry 3]	[7, 8]	11

Create an abstract udf to filter a rows where the value of column “num”> 1

from optimus.functions import abstract_udf as audf

def func(val, attr):
    return val>1

df.rows.select(audf("num", func, "boolean")).table()

words	num	animals	thing	two strings	filter	num 2	col_array	col_int	new_col_1
zombies	2	cat	tv	dog-tv	b	2	[baby 1, sorry 1]	[3, 4]	1
simpsons cat lady	2	frog	table	eagle-tv-plus	1	3	[baby 2, sorry 2]	[5, 6, 7]	1
null	3	eagle	glass	lion-pc	c	4	[baby 3, sorry 3]	[7, 8]	1

Create an abstract udf (Pandas UDF) to pass two arguments to a function a apply a sum operation

from optimus.functions import abstract_udf as audf

def func(val, attr):
    return val+attr[0]+ attr[1]

df.withColumn("num_sum", audf ("num", func, "int", [10,20])).table()

words	num	animals	thing	two strings	filter	num 2	col_array	col_int	new_col_1	num_sum
I like fish	1	dog	housé	cat-car	a	1	[baby, sorry]	[1, 2, 3]	1	31
zombies	2	cat	tv	dog-tv	b	2	[baby 1, sorry 1]	[3, 4]	1	32
simpsons cat lady	2	frog	table	eagle-tv-plus	1	3	[baby 2, sorry 2]	[5, 6, 7]	1	32
null	3	eagle	glass	lion-pc	c	4	[baby 3, sorry 3]	[7, 8]	1	33

cols.apply_expr(columns, func=None, args=None, filter_col_by_dtypes=None, verbose=True)¶

Apply a expression to column.

Here we’ll apply a column expression to when the value of “num” or “num 2” is grater than 2:

from pyspark.sql import functions as F
def func(col_name, attr):
    return F.when(F.col(col_name)>2 ,10).otherwise(1)

df.cols.apply_expr(["num","num 2"], func).table()

words	num	animals	thing	two strings	filter	num 2	col_array	col_int	new_col_1
I like fish	1	dog	housé	cat-car	a	1	[baby, sorry]	[1, 2, 3]	1
zombies	1	cat	tv	dog-tv	b	1	[baby 1, sorry 1]	[3, 4]	1
simpsons cat lady	1	frog	table	eagle-tv-plus	1	10	[baby 2, sorry 2]	[5, 6, 7]	1
null	10	eagle	glass	lion-pc	c	10	[baby 3, sorry 3]	[7, 8]	1

Convert to uppercase:

from pyspark.sql import functions as F
def func(col_name, attr):
    return F.upper(F.col(col_name))

df.cols.apply_expr(["two strings","animals"], func).table()

words	num	animals	thing	two strings	filter	num 2	col_array	col_int	new_col_1
I like fish	1	DOG	housé	CAT-CAR	a	1	[baby, sorry]	[1, 2, 3]	1
zombies	2	CAT	tv	DOG-TV	b	2	[baby 1, sorry 1]	[3, 4]	1
simpsons cat lady	2	FROG	table	EAGLE-TV-PLUS	1	3	[baby 2, sorry 2]	[5, 6, 7]	1
null	3	EAGLE	glass	LION-PC	c	4	[baby 3, sorry 3]	[7, 8]	1

cols.count_na(columns)¶

Returns the NAN and Null count in a Column.

import numpy as np

df_null = op.spark.createDataFrame(
    [(1, 1, None), (1, 2, float(5)), (1, 3, np.nan), (1, 4, None), (1, 5, float(10)), (1, 6, float('nan')), (1, 6, float('nan'))],
    ('session', "timestamp1", "id2"))

df_null.cols.count_na("*")

Out -> {'session': 0, 'timestamp1': 0, 'id2': 5}

cols.count_uniques(columns, estimate=True)¶

Returns how many unique items exist in a columns

df.cols.count_uniques("*")

And you’ll get:

{'words': {'approx_count_distinct': 3},
 'num': {'approx_count_distinct': 3},
 'animals': {'approx_count_distinct': 4},
 'thing': {'approx_count_distinct': 4},
 'two strings': {'approx_count_distinct': 4},
 'filter': {'approx_count_distinct': 4},
 'num 2': {'approx_count_distinct': 4},
 'col_array': {'approx_count_distinct': 3},
 'col_int': {'approx_count_distinct': 4},
 'new_col_1': {'approx_count_distinct': 1}}

cols.replace(columns, search_and_replace=None, value=None, regex=None)¶

Replace a value or a list of values by a specified string

df.cols.replace("animals",["dog","cat"],"animals").table()

Replace “dog”,”cat” in column “animals” by the word “animals”:

words	num	animals	thing	two strings	filter	num 2	col_array	col_int	new_col_1
I like fish	1	animals	housé	cat-car	a	1	[baby, sorry]	[1, 2, 3]	1
zombies	2	animals	tv	dog-tv	b	2	[baby 1, sorry 1]	[3, 4]	1
simpsons cat lady	2	frog	table	eagle-tv-plus	1	3	[baby 2, sorry 2]	[5, 6, 7]	1
null	3	eagle	glass	lion-pc	c	4	[baby 3, sorry 3]	[7, 8]	1

Replace “dog-tv”, “cat”, “eagle”, “fish” in columns “two strings”,”animals” by “animals”:

df.cols.replace(["two strings","animals"], ["dog-tv", "cat", "eagle", "fish"], "animals").table()

words	num	animals	thing	two strings	filter	num 2	col_array	col_int	new_col_1
I like fish	1	dog	housé	cat-car	a	1	[baby, sorry]	[1, 2, 3]	1
zombies	2	animals	tv	animals	b	2	[baby 1, sorry 1]	[3, 4]	1
simpsons cat lady	2	frog	table	eagle-tv-plus	1	3	[baby 2, sorry 2]	[5, 6, 7]	1
null	3	animals	glass	lion-pc	c	4	[baby 3, sorry 3]	[7, 8]	1

cols.nest(input_cols, output_col, shape=None, separator=” “)¶

Concat multiple columns to one with the format specified

df.cols.nest(["num", "new_col_1"], output_col = "col_nested", shape ="vector").table()

Merge two columns in a column vector:

words	num	animals	thing	two strings	filter	num 2	col_array	col_int	new_col_1	col_nested
I like fish	1	dog	housé	cat-car	a	1	[baby, sorry]	[1, 2, 3]	1	[1.0,1.0]
zombies	2	cat	tv	dog-tv	b	2	[baby 1, sorry 1]	[3, 4]	1	[2.0,1.0]
simpsons cat lady	2	frog	table	eagle-tv-plus	1	3	[baby 2, sorry 2]	[5, 6, 7]	1	[2.0,1.0]
null	3	eagle	glass	lion-pc	c	4	[baby 3, sorry 3]	[7, 8]	1	[3.0,1.0]

Merge two columns in a string columns:

df.cols.nest(["animals", "two strings"], output_col= "col_nested", shape = "string").table()

words	num	animals	thing	two strings	filter	num 2	col_array	col_int	new_col_1	col_nested
I like fish	1	dog	housé	cat-car	a	1	[baby, sorry]	[1, 2, 3]	1	dog cat-car
zombies	2	cat	tv	dog-tv	b	2	[baby 1, sorry 1]	[3, 4]	1	cat dog-tv
simpsons cat lady	2	frog	table	eagle-tv-plus	1	3	[baby 2, sorry 2]	[5, 6, 7]	1	frog eagle-tv-plus
null	3	eagle	glass	lion-pc	c	4	[baby 3, sorry 3]	[7, 8]	1	eagle lion-pc