Market Basket Analysis
Market Basket Analysis
BASKET
ANALYSIS
The step by step of Market Basket Analysis
using python
1. Import Dataset
df['Description'] = df['Description'].str.strip()
df.dropna(axis=0, subset=['InvoiceNo'], inplace=True)
df['InvoiceNo'] = df['InvoiceNo'].astype('str')
df = df[~df['InvoiceNo'].str.contains('C')]
basket = (df[df['Country'] =="France"]
.groupby(['InvoiceNo', 'Description'])['Quantity']
.sum().unstack().reset_index().fillna(0)
.set_index('InvoiceNo'))
def encode_units(x):
if x <= 0:
return 0
if x >= 1:
return 1
basket_sets =
basket.applymap(encode_units)
basket_sets.drop('POSTAGE', inplace=True,
axis=1)
frequent_itemsets = apriori(basket_sets,
min_support=0.07, use_colnames=True)
rules = association_rules(frequent_itemsets,
metric="lift", min_threshold=1)
rules.head()
.sum().unstack().reset_index().fillna(0)
.set_index('InvoiceNo'))
basket_sets2 =
basket2.applymap(encode_units)
basket_sets2.drop('POSTAGE',
inplace=True, axis=1)
frequent_itemsets2 =
apriori(basket_sets2, min_support=0.05,
use_colnames=True)
rules2 =
association_rules(frequent_itemsets2,
metric="lift", min_threshold=1)
l=[]
for i in range(1,7501):
l.append([str(st_df.values[i,j]) for j in range(0,20)])
#st_df=pd.read_csv("FILE
#NAME.csv",header=None)print(st_df)
# OR this to get data from user
association_rules = apriori(l,
min_support=0.0045,
min_confidence=0.2, min_lift=3,
min_length=2)
association_results =
list(association_rules)
for item in association_results:
# first index of the inner list
# Contains base item and add item
pair = item[0]
items = [x for x in pair]
print("Rule: " + items[0] + " -> " +
items[1])
# second index of the inner list
print("Support: " + str(item[1]))
# third index of the list located at 0th
position
# of the third index of the inner list
print("Confidence: " + str(item[2][0]
[2]))
print("Lift: " + str(item[2][0][3]))
print("---------------------------------
--------------------")