pyprimed

Install

For staging run:

pip install --upgrade pyprimed --extra-index-url http://pypi-staging.primed.io.s3-website-eu-west-1.amazonaws.com --trusted-host pypi-staging.primed.io.s3-website-eu-west-1.amazonaws.com

And for production run:

pip install --upgrade pyprimed --extra-index-url http://pypi-dev.primed.io.s3-website-eu-west-1.amazonaws.com --trusted-host pypi-dev.primed.io.s3-website-eu-west-1.amazonaws.com

Quickstart

Import the SDK and initiate the connection

from pyprimed import Pio
from pyprimed import Model, Universe, Signal, Target, Campaign, Prediction, ABVariant
from pyprimed.util import PrimedConnection

conn = PrimedConnection(
  host="localhost", 
  port=5000, 
  user="user", 
  password="password",
  pubkey="pubkey",
  secretkey="secretkey")

pio = Pio(conn)

Create a Universe, and attach a few Targets

# create a new universe and attach a single target
pio\
  .universes\
  .create(name="myfirstuniverse")\
  .targets\
  .create(key="ARTICLE-1", value={"url": "www.example.com/article-1"})

# retrieving the newly created universe
u = pio.universes.find(name="myfirstuniverse").first

# list all targets currently associated with this universe
u.targets.all()

# prepare a list of new targets
new_targets = [
  {"key": "ARTICLE-2", "value": {"url": "www.example.com/article-2"}}, 
  {"key": "ARTICLE-3", "value": {"url": "www.example.com/article-3"}}
]

# create the new targets
u.targets.create(*new_targets)

# targets are upserted, which means that for a given key there
# can be only one instance in the database. Trying to create an
# instance with the same key will update the value of the record
# in the database
u.targets.create(key="ARTICLE-1", value={"url": "THIS IS NEW!"})
u.targets.find(key="ARTICLE-1").first.value 

Create a Model, and attach a few Signals

# create a new model and attach a single signal
pio\
  .models\
  .create(name="myfirstmodel")\
  .signals\
  .create(key="ALICE")


# retrieving the created model
m = pio.models.find(name="myfirstmodel").first

# list all signals currently associated with this model
m.signals.all() 

# prepare a list of new signals
new_signals = [
  {"key": "BOB"}, 
  {"key": "CHRIS"}
]

# create the new signals
m.signals.create(*new_signals)

# preprare a set of predictions
# WARNING: `signal_key` and `target_key` should always be a string!
predictions = [
  {"signal_key": "ALICE", "target_key": "ARTICLE-1", "score": 0.35}
  {"signal_key": "BOB", "target_key": "ARTICLE-1", "score": 0.75}, 
  {"signal_key": "CHRIS", "target_key": "ARTICLE-1", "score": 0.15}
]

# create the new predictions
m.predictions.create(*predictions)

Create a Campaign, and set up an AB test to start using the Predictions


# create a campaign
c = u.campaigns.create(key="test.campaign", name="myfirstcampaign")

# attach and abvariant to the campaign
c.abvariants.create(label="A", models=[{"uid": m.uid, "weight": 1.0}])

# get personalized!
c.personalize(keys=["BOB"], abvariant="A")

Combine PrimedIO with Apache Spark

With the addition of piospark PrimedIO offers functionality to more readily integrate with Apache Spark. Notably, the package provides PySpark UDFs that can do the heavy lifting of converting DataFrame columns to a format that is ready for PrimedIO to receive.

# df contains your predictions, we'll assume there are 
# at least the following columns:
#
#     signal_key
#     target_key
#     target_url
#     target_img
#     target_title
#     predicted_score
df

# import the piospark module
from pyprimed import piospark

# we use piospark.udf.signal() to obtain a udf that will 
# take care of creating our signals
signals = df\
  .withColumn("signal", piospark.udf.signal()('signal_key'))\
  .select("signal")

# we use piospark.udf.target() to obtain a udf that will 
# take care of creating our signals. Note that we can provide
# additional fields that will constitute the 'value' of the 
# `Target`. Keep in mind that the order of the fields as 
# specified `piospark.udf.target('img', 'url', 'title')`
# has to be consistent with the order in which you call the 
# udf `('target_key', 'target_img', 'target_url', 'target_title')`
targets = df\
  .select('target_key', 'target_img', 'target_url', 'target_title')\
  .distinct()\
  .withColumn("target", piospark.udf.target('img', 'url', 'title')('target_key', 'target_img', 'target_url', 'target_title'))\
  .select("target")

# we use piospark.udf.prediction() to obtain a udf that will 
# take care of creating our predictions
predictions = df\
  .withColumn("prediction", piospark.udf.prediction()('signal_key', 'target_key', 'score'))\
  .select("prediction")

# upload the signals to a model with the name 'mymodel', 
# be careful that we do a `collect()` in this example.
# Also note that the `json.loads()` is needed, as
# the PySDK expects dictionaries, and not Strings.
pio\
  .models\
  .find(name="mymodel")\
  .first\
  .signals\
  .create(*[json.loads(row.signal) for row in signals.collect()])

# upload the targets to a universe with the name 'myuniverse', 
# be careful that we do a `collect()` in this example.
# Also note that the `json.loads()` is needed, as
# the PySDK expects dictionaries, and not Strings.
pio\
  .universes\
  .find(name="myuniverse")\
  .first\
  .targets\
  .create(*[json.loads(row.target) for row in targets.collect()])

# upload the predictions to a model with the name 'mymodel', 
# be careful that we do a `collect()` in this example.
# Also note that the `json.loads()` is needed, as
# the PySDK expects dictionaries, and not Strings.
pio\
  .models\
  .find(name="mymodel")\
  .first\
  .predictions\
  .create(*[json.loads(row.prediction) for row in predictions.collect()])
# add recency (TO BE DONE)
import datetime

u.targets.create(
    key="mytarget", 
    published_at=datetime.datetime.utcnow().isoformat(), 
    recency_histogram=[1.0, 0.5, 0.1], 
    recency_xmax=1800)