Scan Profiles API Tutorial: Difference between revisions
No edit summary |
No edit summary |
||
Line 129: | Line 129: | ||
# --- 2. Gather Data Source IDs --- | # --- 2. Gather Data Source IDs --- | ||
ds_connections_url = f"{ | ds_connections_url = f"{base_url}/ds-connections" | ||
try: | try: | ||
Line 137: | Line 137: | ||
data_sources = response.json() | data_sources = response.json() | ||
# | |||
# We will look for data sources of these types. | |||
print( | target_types = {'snowflake', 's3'} | ||
target_data_source_ids = [] | |||
# Access the list via the 'data' object | |||
connections_list = data_sources_response.get('data', {}).get('ds_connections', []) | |||
for ds in connections_list: | |||
if ds.get('type') in target_types: | |||
print(f" -> Found matching source: {ds.get('name')} (ID: {ds.get('_id')})") | |||
target_data_source_ids.append(ds.get('_id')) | |||
if not target_data_source_ids: | |||
print("Could not find any data sources with the specified types. Exiting.") | |||
exit() | |||
# --- 3. Create a Scan Profile --- | # --- 3. Create a Scan Profile --- | ||
scan_profiles_url = f"{base_url}/scanProfiles" | |||
scan_profiles_url = f"{ | |||
# The payload defines the new scan profile. | # The payload defines the new scan profile. | ||
new_profile_data = { | new_profile_data = { | ||
"name": " | "name": "Dynamic Marketing Data Scan", | ||
"description": "Scans | "description": "Scans all Snowflake and S3 marketing sources", | ||
"dataSourceList": | "dataSourceList": target_data_source_ids, | ||
"scanTemplateId": "standard-privacy-template-001", | "scanTemplateId": "standard-privacy-template-001", | ||
"isSingleRunScan": True | "isSingleRunScan": True | ||
Line 165: | Line 174: | ||
if not new_profile_id: | if not new_profile_id: | ||
raise ValueError("Failed to get ID from the created profile response.") | raise ValueError("Failed to get ID from the created profile response.") | ||
# --- 4. Verify Scan Profile Creation --- | # --- 4. Verify Scan Profile Creation --- | ||
verify_url = f"{scan_profiles_url}/{new_profile_id}" | verify_url = f"{scan_profiles_url}/{new_profile_id}" | ||
Line 178: | Line 183: | ||
verified_profile = response_verify.json() | verified_profile = response_verify.json() | ||
print("Verification successful! The profile was created correctly.") | print("Verification successful! The profile was created correctly.") | ||
except requests.exceptions.HTTPError as http_err: | except requests.exceptions.HTTPError as http_err: | ||
Line 188: | Line 192: | ||
<tab name="JavaScript"><syntaxhighlight lang="javascript" line> | <tab name="JavaScript"><syntaxhighlight lang="javascript" line> | ||
// Scan Profiles API Tutorial in progress | // Scan Profiles API Tutorial in progress | ||
const fetch = require('node-fetch'); | |||
// --- 1. Setup and Authentication --- | |||
# Base URL of the BigID API (training sandbox) | |||
base_url = "https://developer.bigid.com/api/v1" | |||
const AUTH_TOKEN = "SAMPLE"; | |||
const headers = { | |||
"Authorization": `Bearer ${AUTH_TOKEN}`, | |||
"Content-Type": "application/json" | |||
}; | |||
async function runBigIDWorkflow() { | |||
try { | |||
// --- 2. Gather and Parse Data Source IDs --- | |||
const dsConnectionsUrl = `${base_url}/ds-connections`; | |||
const dsResponse = await fetch(dsConnectionsUrl, { headers }); | |||
if (!dsResponse.ok) throw new Error(`HTTP error! Status: ${dsResponse.status}`); | |||
const dataSourcesResponse = await dsResponse.json(); | |||
const targetTypes = ['snowflake', 's3']; | |||
// Access the list via the 'data' object. | |||
const connectionsList = (dataSourcesResponse.data && dataSourcesResponse.data.ds_connections) || []; | |||
const targetDataSourceIds = connectionsList | |||
.filter(ds => targetTypes.includes(ds.type)) | |||
.map(ds => { | |||
console.log(` -> Found matching source: ${ds.name} (ID: ${ds._id})`); | |||
return ds._id; | |||
}); | |||
if (targetDataSourceIds.length === 0) { | |||
console.log("Could not find any data sources with the specified types. Exiting."); | |||
return; | |||
} | |||
// --- 3. Create a Scan Profile Using Dynamic IDs --- | |||
const scanProfilesUrl = `${base_url}/scanProfiles`; | |||
const newProfileData = { | |||
"name": "Dynamic Marketing Data Scan", | |||
"description": "Scans all Snowflake and S3 marketing sources", | |||
"dataSourceList": targetDataSourceIds, | |||
"scanTemplateId": "standard-privacy-template-001", | |||
"isSingleRunScan": true | |||
}; | |||
const createResponse = await fetch(scanProfilesUrl, { | |||
method: 'POST', | |||
headers: headers, | |||
body: JSON.stringify(newProfileData) | |||
}); | |||
if (!createResponse.ok) throw new Error(`HTTP error! Status: ${createResponse.status}`); | |||
const createdProfile = await createResponse.json(); | |||
const newProfileId = createdProfile._id; | |||
if (!newProfileId) throw new Error("Failed to get ID from created profile."); | |||
// --- 4. Verify Scan Profile Creation --- | |||
const verifyUrl = `${scanProfilesUrl}/${newProfileId}`; | |||
const verifyResponse = await fetch(verifyUrl, { headers }); | |||
if (!verifyResponse.ok) throw new Error(`HTTP error! Status: ${verifyResponse.status}`); | |||
const verifiedProfile = await verifyResponse.json(); | |||
console.log("Verification successful! The profile was created correctly."); | |||
console.log(JSON.stringify(verifiedProfile, null, 2)); | |||
} catch (error) { | |||
console.error("An error occurred during the workflow:", error); | |||
} | |||
} | |||
runBigIDWorkflow(); | |||
</syntaxhighlight> | </syntaxhighlight> | ||
</tab> | </tab> |
Revision as of 13:25, 5 August 2025
- How to create a new scan profile using the Scan Profiles API.
- How to specify only the data sources you want to scan.
- How to configure the scan as a one-time or scheduled job.
- How to verify the scan profile was created successfully.
In this tutorial, we'll use SAMPLE as our session token. This is unique to the training sandbox and will not work in other environments. See BigID API/Tutorial for information on authenticating with BigID.
To view the complete code for all steps, see the section labelled Code Samples.
For more information on the API capabilities used in this tutorial, check out the Scan Profiles API Docs or the Data Sources API Docs.
1. Authenticate Using Your API Key
All API requests require authentication using a valid API key. Refer to BigID Documentation to obtain your token. Then, define the Authorization header using the format `Authorization: Bearer YOUR_API_KEY`. This header must be included in every request to ensure proper authentication and access to BigID’s API endpoints. Throughout the tutorial, we will be using SAMPLE as our token.
2. Gather Data Source IDs
Before creating your scan profile, you need to gather the unique IDs of the data sources you want to include in your scan. If you have not obtained them already, whether by browsing the BigID UI or using the API, this step ensures you have the correct identifiers to specify exactly which sources to scan.
You can retrieve this information using the GET /api/v1/ds-connections endpoint. This API returns the details of one or more data sources in your BigID environment.
Depending on which data sources you're interested in, this endpoint supports several optional query parameters to help you narrow down and customize the results:
- skip (integer): Number of data sources to skip for pagination.
- limit (integer): Maximum number of data sources to return.
- requireTotalCount (boolean): If true, returns the total count of matching data sources.
- sort (string): Sort results by a specified field and order.
- filter (string): Filter results based on field values.
The response provides common fields for each data source. From this, you can compile the _id values for all the data sources you want to include in your scan profile. These IDs will be used in the dataSourceList field when creating the scan profile in the next step.
3. Create a Scan Profile
Once you have a list of all the data sources you’d like to include in the scan, you can proceed to create a new scan profile using the Scan Profiles API. This profile defines what you want to scan, when you want to scan it, and how it should behave.
Use the POST /api/v1/scanProfiles endpoint to define the profile details, including:
- A name and optional description
- A list of dataSourceList IDs (targeted sources only)
- A valid scanTemplateId
- A schedule for recurring scans or isSingleRunScan: true for a one-time run
For example, to establish a scan profile named "Targeted Marketing Data Scan" that runs a one-time scan on two specific data sources (e.g., marketing-related databases), you would make the request using the following details:
{
"name": "Targeted Marketing Data Scan",
"description": "Scans only marketing-related sources for compliance",
"dataSourceList": [
"64f7df00cc834f0001a44e85",
"64f7df00cc834f0001a44e86"
],
"scanTemplateId": "standard-privacy-template-001",
"isSingleRunScan": true
}
Once submitted, BigID will create the scan profile and queue the scan based on the schedule you've defined (or start it right away for a one-time scan). Upon success, the API response will return the newly created scan profile, including its unique ID. Be sure to save this ID, as you’ll need it in the next step to verify and manage the scan profile.
4. Verify Scan Profile Creation
After creating your scan profile, you’ll want to double-check that BigID received and saved it correctly. Using the id returned in the previous step, you can retrieve the scan profile directly to confirm its details and status.
Use the GET /api/v1/scanProfiles/{id} endpoint, replacing {id} with your unique scan profile id returned from the creation step, to retrieve and verify the details of your scan profile.
At this step, you’re primarily confirming a successful API response and that the scan profile exists with the expected ID. If the request returns a valid profile object (status code 200), you know your scan profile was created correctly and is ready to rock!
5. Troubleshooting
If your request fails, here’s what the server might tell you, and how to fix it:
Status Code | Example Response | What It Means | How to Fix It |
---|---|---|---|
200 | Successful response with scan data | Everything’s looking good! | Keep cruising. |
400 | { "error": "Scan ID is invalid" } |
Bad or malformed scan ID provided | Double-check the scan ID you’re using. |
404 | { "error": "Scan 1234 was not found" } |
Scan ID doesn’t exist | Make sure the ID is valid and fetched from the parent scans endpoint. |
401 | Unauthorized | API key missing or invalid | Verify your API key and authorization header. |
500 | { "status": "error", "message": "Server error", "errors": [{}] } |
BigID server hit a snag (internal error) | Wait a moment and retry. If it persists, reach out to support. |
Code Samples
# Scan Profiles API Tutorial
import requests
import json
# --- 1. Setup and Authentication ---
# Base URL of the BigID API (training sandbox)
base_url = "https://developer.bigid.com/api/v1"
# Replace SAMPLE with your actual API key
AUTH_TOKEN = "SAMPLE"
headers = {
"Authorization": f"Bearer {AUTH_TOKEN}",
"Content-Type": "application/json"
}
# --- 2. Gather Data Source IDs ---
ds_connections_url = f"{base_url}/ds-connections"
try:
# We'll add a limit to get a small number of data sources for this example.
response = requests.get(f"{ds_connections_url}?limit=5", headers=headers)
response.raise_for_status() # Raise an error for bad status codes (4xx or 5xx)
data_sources = response.json()
# We will look for data sources of these types.
target_types = {'snowflake', 's3'}
target_data_source_ids = []
# Access the list via the 'data' object
connections_list = data_sources_response.get('data', {}).get('ds_connections', [])
for ds in connections_list:
if ds.get('type') in target_types:
print(f" -> Found matching source: {ds.get('name')} (ID: {ds.get('_id')})")
target_data_source_ids.append(ds.get('_id'))
if not target_data_source_ids:
print("Could not find any data sources with the specified types. Exiting.")
exit()
# --- 3. Create a Scan Profile ---
scan_profiles_url = f"{base_url}/scanProfiles"
# The payload defines the new scan profile.
new_profile_data = {
"name": "Dynamic Marketing Data Scan",
"description": "Scans all Snowflake and S3 marketing sources",
"dataSourceList": target_data_source_ids,
"scanTemplateId": "standard-privacy-template-001",
"isSingleRunScan": True
}
response_create = requests.post(scan_profiles_url, headers=headers, json=new_profile_data)
response_create.raise_for_status()
created_profile = response_create.json()
new_profile_id = created_profile.get("_id")
if not new_profile_id:
raise ValueError("Failed to get ID from the created profile response.")
# --- 4. Verify Scan Profile Creation ---
verify_url = f"{scan_profiles_url}/{new_profile_id}"
response_verify = requests.get(verify_url, headers=headers)
response_verify.raise_for_status()
verified_profile = response_verify.json()
print("Verification successful! The profile was created correctly.")
except requests.exceptions.HTTPError as http_err:
print(f"HTTP error occurred: {http_err}")
print(f"Response content: {http_err.response.text}")
except Exception as err:
print(f"An other error occurred: {err}")
// Scan Profiles API Tutorial in progress
const fetch = require('node-fetch');
// --- 1. Setup and Authentication ---
# Base URL of the BigID API (training sandbox)
base_url = "https://developer.bigid.com/api/v1"
const AUTH_TOKEN = "SAMPLE";
const headers = {
"Authorization": `Bearer ${AUTH_TOKEN}`,
"Content-Type": "application/json"
};
async function runBigIDWorkflow() {
try {
// --- 2. Gather and Parse Data Source IDs ---
const dsConnectionsUrl = `${base_url}/ds-connections`;
const dsResponse = await fetch(dsConnectionsUrl, { headers });
if (!dsResponse.ok) throw new Error(`HTTP error! Status: ${dsResponse.status}`);
const dataSourcesResponse = await dsResponse.json();
const targetTypes = ['snowflake', 's3'];
// Access the list via the 'data' object.
const connectionsList = (dataSourcesResponse.data && dataSourcesResponse.data.ds_connections) || [];
const targetDataSourceIds = connectionsList
.filter(ds => targetTypes.includes(ds.type))
.map(ds => {
console.log(` -> Found matching source: ${ds.name} (ID: ${ds._id})`);
return ds._id;
});
if (targetDataSourceIds.length === 0) {
console.log("Could not find any data sources with the specified types. Exiting.");
return;
}
// --- 3. Create a Scan Profile Using Dynamic IDs ---
const scanProfilesUrl = `${base_url}/scanProfiles`;
const newProfileData = {
"name": "Dynamic Marketing Data Scan",
"description": "Scans all Snowflake and S3 marketing sources",
"dataSourceList": targetDataSourceIds,
"scanTemplateId": "standard-privacy-template-001",
"isSingleRunScan": true
};
const createResponse = await fetch(scanProfilesUrl, {
method: 'POST',
headers: headers,
body: JSON.stringify(newProfileData)
});
if (!createResponse.ok) throw new Error(`HTTP error! Status: ${createResponse.status}`);
const createdProfile = await createResponse.json();
const newProfileId = createdProfile._id;
if (!newProfileId) throw new Error("Failed to get ID from created profile.");
// --- 4. Verify Scan Profile Creation ---
const verifyUrl = `${scanProfilesUrl}/${newProfileId}`;
const verifyResponse = await fetch(verifyUrl, { headers });
if (!verifyResponse.ok) throw new Error(`HTTP error! Status: ${verifyResponse.status}`);
const verifiedProfile = await verifyResponse.json();
console.log("Verification successful! The profile was created correctly.");
console.log(JSON.stringify(verifiedProfile, null, 2));
} catch (error) {
console.error("An error occurred during the workflow:", error);
}
}
runBigIDWorkflow();
Summary
Congratulations! In this tutorial, you have learned how to create a targeted scan profile in BigID by specifying the exact data sources you want to include. You’ve mastered how to submit a scan profile via the API, retrieve its unique ID, and verify that the profile was successfully created and saved.
Now that you’ve set up and verified your scan profile, you can take it further by monitoring or managing scan execution using the Scan Insights API.