BigID API/Metadata Export Tutorial: Difference between revisions
No edit summary |
|||
(8 intermediate revisions by 2 users not shown) | |||
Line 11: | Line 11: | ||
{{:InformationTemplates:BigID_Catalog}} | {{:InformationTemplates:BigID_Catalog}} | ||
<html><center><img class="diagram" src="https:// | <html><center><img class="diagram" src="https://wiki-images.bigid.tools/cdn/objects.svg"/></center></html> | ||
For unstructured data, we discover information directly about and inside the files. For example, we can see that there are phone numbers within a file, or that a file matches a machine learning model for an invoice. | For unstructured data, we discover information directly about and inside the files. For example, we can see that there are phone numbers within a file, or that a file matches a machine learning model for an invoice. | ||
For structured data we discover data about the data inside of the columns as well as the columns themselves. For example we would know that a database ''Users'' has a column named State and that column contains values of the type US State Abbreviation. | For structured data we discover data about the data inside of the columns as well as the columns themselves. For example we would know that a database ''Users'' has a column named State and that column contains values of the type US State Abbreviation. This means that if we want information about specific columns we will need to perform an extra API request. The below API request shows how to retrieve the information about a specific table. The column parameter follows the format of Data Source Name.schema name.table name. | ||
<html> | |||
<iframe style="border:0px; width:100%; height:400px; border-radius:10px;" src="https://apiexplorer.bigid.tools/?url=data-catalog%2Fobject-details%2Fcolumns%3Fobject_name%3DDevices.rockstream.Devices&method=GET&headers=%5B%7B%22name%22%3A%22Authorization%22%2C%22value%22%3A%22SAMPLE%22%7D%5D"></iframe> | |||
</html> | |||
For unstructured data sources, you can use the information returned from the top level catalog call or use the attributes call to get more detailed information like below: | |||
<html> | |||
<iframe style="border:0px; width:100%; height:400px; border-radius:10px;" src="https://apiexplorer.bigid.tools/?url=data-catalog%2Fobject-details%2Fattributes%3Fobject_name%3DSales%2520Drive.devops%2540bigiddemo.com%252FEducation%252FProspects4-Restricted.docx&method=GET&headers=%5B%7B%22name%22%3A%22Authorization%22%2C%22value%22%3A%22SAMPLE%22%7D%5D"></iframe> | |||
</html> | |||
By using these three API calls you can find out information about any element being scanned by your BigID system. | |||
== Code Samples == | |||
<tabs> | |||
<tab name="Python"><syntaxhighlight lang="python" line> | |||
# Metadata Export Tutorial | |||
import requests | |||
import json | |||
base_url = "https://developer.bigid.com/api/v1" | |||
headers = { | |||
"Authorization": "Bearer SAMPLE", | |||
"Content-Type": "application/json" | |||
} | |||
# 1. Get all objects in the catalog | |||
url = f"{base_url}/data-catalog" | |||
res = requests.get(url, headers=headers) | |||
data = res.json() | |||
print(json.dumps(data, indent=2)) | |||
# 2. Retrieve information about a specific table | |||
url = f"{base_url}/data-catalog/object-details/columns?object_name=DataSourceName.schemaName.tableName" # Replace "DataSourceName.schemaName.tableName with desired table information" | |||
res = requests.get(url, headers=headers) | |||
data = res.json() | |||
print(json.dumps(data, indent=2)) | |||
# 3. Retrieve information about unstructured data source | |||
url = f"{base_url}/data-catalog/object-details/attributes?object_name=Sales%20Drive.devops%40bigiddemo.com%2FEducation%2FProspects4-Restricted.docx" # Example using attribute object_name to fetch detailed information | |||
res = requests.get(url, headers=headers) | |||
data = res.json() | |||
print(json.dumps(data, indent=2)) | |||
</syntaxhighlight></tab> | |||
<tab name="JavaScript"><syntaxhighlight lang="javascript" line> | |||
// Metadata Export Tutorial | |||
const baseUrl = "https://developer.bigid.com/api/v1"; | |||
const headers = { | |||
"Authorization": "Bearer SAMPLE", // Replace with your token | |||
"Content-Type": "application/json" | |||
}; | |||
// 1. Get all catalog objects | |||
async function getAllCatalogObjects() { | |||
const res = await fetch(`${baseUrl}/data-catalog`, { headers }); | |||
const data = await res.json(); | |||
console.log("All Catalog Objects:\n", JSON.stringify(data, null, 2)); | |||
return data; | |||
} | |||
// 2. Retrieve information about a specific structured table | |||
async function getTableMetadata() { | |||
const tableFQN = "DataSourceName.schemaName.tableName"; // Replace this with your actual table name | |||
const url = `${baseUrl}/data-catalog/object-details/columns?object_name=${encodeURIComponent(tableFQN)}`; | |||
console.log(`Fetching metadata for table: ${tableFQN}`); | |||
const res = await fetch(url, { headers }); | |||
const data = await res.json(); | |||
console.log("Structured Table Metadata:\n", JSON.stringify(data, null, 2)); | |||
return data; | |||
} | |||
// 3. Retrieve info about an unstructured object | |||
async function getUnstructuredAttributes() { | |||
const objectFQN = "Sales Drive.devops@bigiddemo.com/Education/Prospects4-Restricted.docx"; // Replace as needed | |||
const url = `${baseUrl}/data-catalog/object-details/attributes?object_name=${encodeURIComponent(objectFQN)}`; | |||
console.log(`Fetching metadata for unstructured object: ${objectFQN}`); | |||
const res = await fetch(url, { headers }); | |||
const data = await res.json(); | |||
console.log("Unstructured Object Metadata:\n", JSON.stringify(data, null, 2)); | |||
return data; | |||
} | |||
// Run all 3 steps | |||
(async () => { | |||
await getAllCatalogObjects(); | |||
await getTableMetadata(); | |||
await getUnstructuredAttributes(); | |||
})(); | |||
</syntaxhighlight> | |||
</tab> | |||
</tabs> | |||
<html> | |||
<style> | |||
.tabs-tabbox > .tabs-container { | |||
margin-top: -1px; | |||
padding: | |||
2px 6px; | |||
border-radius: | |||
8px; | |||
position: relative; | |||
border: | |||
2px solid #848484; | |||
width: inherit; | |||
max-width: inherit; | |||
min-width: inherit; | |||
box-shadow: | |||
0px 4px 6px 1px rgba(0, 0, 0, 0.1); | |||
z-index: 1; | |||
} | |||
.tabs-tabbox > .tabs-label { | |||
margin: | |||
0 3px; | |||
border-bottom: | |||
none; | |||
border-radius: | |||
4px 4px 0 0; | |||
position: relative; | |||
display: inline-block; | |||
vertical-align: bottom; | |||
padding-left: 10px; | |||
padding-right: 10px; | |||
padding-bottom: 3px; | |||
padding-top: 3px; | |||
} | |||
.tabs-tabbox > .tabs-input:checked + .tabs-label, .tabs-input-0:checked + .tabs-input-1 + .tabs-label { | |||
background-color: #0e69b2 !important; | |||
border-color: | |||
#848484; | |||
z-index: 0; | |||
color: white; | |||
} | |||
.tabs-label { | |||
cursor: pointer; | |||
border: | |||
2px solid #848484; | |||
} | |||
.mw-body .tabs-label { | |||
background-color: #ffffff26; | |||
} | |||
</style> | |||
</html> | |||
[[Category:Tutorial]][[Category:API]] |
Latest revision as of 19:05, 17 June 2025
- What the BigID data catalog can be used for
- What is the structure of the BigID Catalog
- How to retrieve data from each part of the BigID Catalog Structure
The BigID Catalog
The BigID catalog provides a view into all of your data. It allows you to see the types of data being held in each column, the access rights for that data and how it relates to other information across your systems. Below is a demo of the catalog through the BigID UI:
Since we care about the data within the API as opposed to the UI, let's see what the API response that powers this page looks like.
In the results section of the response you'll see a listing much like that in the UI. Each of these items is an object. An object is:
- A database table in a structured data source
- A file in an unstructured data source
For unstructured data, we discover information directly about and inside the files. For example, we can see that there are phone numbers within a file, or that a file matches a machine learning model for an invoice.
For structured data we discover data about the data inside of the columns as well as the columns themselves. For example we would know that a database Users has a column named State and that column contains values of the type US State Abbreviation. This means that if we want information about specific columns we will need to perform an extra API request. The below API request shows how to retrieve the information about a specific table. The column parameter follows the format of Data Source Name.schema name.table name.
For unstructured data sources, you can use the information returned from the top level catalog call or use the attributes call to get more detailed information like below:
By using these three API calls you can find out information about any element being scanned by your BigID system.
Code Samples
# Metadata Export Tutorial
import requests
import json
base_url = "https://developer.bigid.com/api/v1"
headers = {
"Authorization": "Bearer SAMPLE",
"Content-Type": "application/json"
}
# 1. Get all objects in the catalog
url = f"{base_url}/data-catalog"
res = requests.get(url, headers=headers)
data = res.json()
print(json.dumps(data, indent=2))
# 2. Retrieve information about a specific table
url = f"{base_url}/data-catalog/object-details/columns?object_name=DataSourceName.schemaName.tableName" # Replace "DataSourceName.schemaName.tableName with desired table information"
res = requests.get(url, headers=headers)
data = res.json()
print(json.dumps(data, indent=2))
# 3. Retrieve information about unstructured data source
url = f"{base_url}/data-catalog/object-details/attributes?object_name=Sales%20Drive.devops%40bigiddemo.com%2FEducation%2FProspects4-Restricted.docx" # Example using attribute object_name to fetch detailed information
res = requests.get(url, headers=headers)
data = res.json()
print(json.dumps(data, indent=2))
// Metadata Export Tutorial
const baseUrl = "https://developer.bigid.com/api/v1";
const headers = {
"Authorization": "Bearer SAMPLE", // Replace with your token
"Content-Type": "application/json"
};
// 1. Get all catalog objects
async function getAllCatalogObjects() {
const res = await fetch(`${baseUrl}/data-catalog`, { headers });
const data = await res.json();
console.log("All Catalog Objects:\n", JSON.stringify(data, null, 2));
return data;
}
// 2. Retrieve information about a specific structured table
async function getTableMetadata() {
const tableFQN = "DataSourceName.schemaName.tableName"; // Replace this with your actual table name
const url = `${baseUrl}/data-catalog/object-details/columns?object_name=${encodeURIComponent(tableFQN)}`;
console.log(`Fetching metadata for table: ${tableFQN}`);
const res = await fetch(url, { headers });
const data = await res.json();
console.log("Structured Table Metadata:\n", JSON.stringify(data, null, 2));
return data;
}
// 3. Retrieve info about an unstructured object
async function getUnstructuredAttributes() {
const objectFQN = "Sales Drive.devops@bigiddemo.com/Education/Prospects4-Restricted.docx"; // Replace as needed
const url = `${baseUrl}/data-catalog/object-details/attributes?object_name=${encodeURIComponent(objectFQN)}`;
console.log(`Fetching metadata for unstructured object: ${objectFQN}`);
const res = await fetch(url, { headers });
const data = await res.json();
console.log("Unstructured Object Metadata:\n", JSON.stringify(data, null, 2));
return data;
}
// Run all 3 steps
(async () => {
await getAllCatalogObjects();
await getTableMetadata();
await getUnstructuredAttributes();
})();